[llvm-branch-commits] [llvm] [AMDGPU] Set MONonVolatile on memory accesses for spills (PR #179414)

Pierre van Houtryve via llvm-branch-commits llvm-branch-commits at lists.llvm.org
Tue Feb 3 05:45:59 PST 2026


https://github.com/Pierre-vh updated https://github.com/llvm/llvm-project/pull/179414

>From ac3c1eb25e7d8db5821e88ddb24ad0f3f11d9479 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Fri, 23 Jan 2026 14:40:27 +0100
Subject: [PATCH 1/2] [AMDGPU] Set MONonVolatile on memory accesses for spills

Mark the memory operand of spill load/stores as non-volatile, so that these
loads and stores are emitted with `nv` set.

The reason is that scratch memory used by spills will never be shared by
another thread. It's purely thread local and thus a good fit for the `nv` bit.
---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |     8 +-
 .../AMDGPU/accvgpr-spill-scc-clobber.mir      |  8628 ++++++------
 llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir  |    16 +-
 .../av-spill-expansion-with-machine-cp.mir    |     8 +-
 .../AMDGPU/av_spill_cross_bb_usage.mir        |    68 +-
 llvm/test/CodeGen/AMDGPU/bf16.ll              |    24 +-
 .../AMDGPU/branch-relax-indirect-branch.mir   |     8 +-
 .../AMDGPU/branch-relax-no-terminators.mir    |     8 +-
 .../CodeGen/AMDGPU/bug-undef-spilled-agpr.mir |     8 +-
 .../eliminate-frame-index-s-mov-b32.mir       |   332 +-
 .../eliminate-frame-index-v-add-co-u32.mir    |    36 +-
 llvm/test/CodeGen/AMDGPU/frame-index.mir      |   280 +-
 .../CodeGen/AMDGPU/insert-waitcnts-crash.ll   |     4 +-
 ...egrewriter-live-out-undef-subregisters.mir |     4 +-
 .../AMDGPU/kernel-mubuf-with-voffset.mir      |     2 +-
 .../AMDGPU/memory-legalizer-buffer-atomics.ll |     8 +-
 ...al-regcopy-and-spill-missed-at-regalloc.ll |     4 +-
 .../AMDGPU/pei-amdgpu-cs-chain-preserve.mir   |    12 +-
 .../CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir    |     8 +-
 .../CodeGen/AMDGPU/pei-build-av-spill.mir     |  2520 ++--
 .../AMDGPU/pei-build-spill-partial-agpr.mir   |   124 +-
 llvm/test/CodeGen/AMDGPU/pei-build-spill.mir  |  1804 +--
 .../AMDGPU/pei-reg-scavenger-position.mir     |     4 +-
 .../AMDGPU/pei-scavenge-vgpr-spill.mir        |    18 +-
 .../AMDGPU/pei-vgpr-block-spill-csr.mir       |    68 +-
 .../regalloc-introduces-copy-sgpr-to-agpr.mir |   136 +-
 .../CodeGen/AMDGPU/same-slot-agpr-sgpr.mir    |    10 +-
 .../AMDGPU/sgpr-spill-overlap-wwm-reserve.mir |    20 +-
 .../AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir |   104 +-
 .../AMDGPU/sgpr-spill-vmem-large-frame.mir    |    12 +-
 llvm/test/CodeGen/AMDGPU/sgpr-spill.mir       |   360 +-
 .../AMDGPU/spill-agpr-partially-undef.mir     |    12 +-
 llvm/test/CodeGen/AMDGPU/spill-agpr.mir       |     8 +-
 .../AMDGPU/spill-reg-tuple-super-reg-use.mir  |    24 +-
 .../AMDGPU/spill-restore-partial-copy.mir     |     8 +-
 .../CodeGen/AMDGPU/spill-special-sgpr.mir     |    54 +-
 .../CodeGen/AMDGPU/spill-to-agpr-partial.mir  |    24 +-
 llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir   |    48 +-
 llvm/test/CodeGen/AMDGPU/spillv16.ll          |    56 +-
 llvm/test/CodeGen/AMDGPU/spillv16.mir         |    16 +-
 ...d-op-for-wwm-scratch-reg-spill-restore.mir |    20 +-
 ...tor-spill-restore-to-other-vector-type.mir |    24 +-
 .../CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir |   224 +-
 llvm/test/CodeGen/AMDGPU/vgpr-spill.mir       |    28 +-
 .../wait-xcnt-atomic-rmw-optimization.ll      |    26 +-
 .../AMDGPU/whole-wave-functions-pei.mir       |    90 +-
 .../CodeGen/AMDGPU/whole-wave-functions.ll    | 11056 ++++++++--------
 47 files changed, 13183 insertions(+), 13181 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 96c2f6530fe4c..1e7ec1832db25 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1836,10 +1836,12 @@ void SIRegisterInfo::buildSpillLoadStore(
       IsKill = false;
     }
 
+    // Create the MMO, additional set the NonVolatile flag as scratch memory
+    // used for spills will not be used outside the thread.
     MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RegOffset);
-    MachineMemOperand *NewMMO =
-        MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
-                                 commonAlignment(Alignment, RegOffset));
+    MachineMemOperand *NewMMO = MF->getMachineMemOperand(
+        PInfo, MMO->getFlags() | MONonVolatile, RemEltSize,
+        commonAlignment(Alignment, RegOffset));
 
     auto MIB =
         BuildMI(MBB, MI, DL, *Desc)
diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
index c1617574becc3..bc2446bc40cf4 100644
--- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
@@ -27,11 +27,11 @@ body:             |
   ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX908-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
   ; GFX908-NEXT:   $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX908-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.1:
@@ -97,187 +97,187 @@ body:             |
   ; GFX90A-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -289,182 +289,182 @@ body:             |
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
-  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -521,12 +521,12 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.1:
@@ -592,188 +592,188 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   $agpr0 = SCRATCH_LOAD_DWORD killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr0 = SCRATCH_LOAD_DWORD killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:
@@ -785,182 +785,182 @@ body:             |
   ; GFX90A-FLATSCR-NEXT: bb.2:
   ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
-  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -1045,13 +1045,13 @@ body:             |
   ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX908-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
   ; GFX908-NEXT:   $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
-  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
+  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
   ; GFX908-NEXT:   $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX908-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.1:
@@ -1117,188 +1117,188 @@ body:             |
   ; GFX90A-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   $agpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   $agpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -1310,182 +1310,182 @@ body:             |
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
-  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -1542,14 +1542,14 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
-  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1 + 4, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.1:
@@ -1615,188 +1615,188 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.1, align 4, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.1, align 4, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:
@@ -1808,182 +1808,182 @@ body:             |
   ; GFX90A-FLATSCR-NEXT: bb.2:
   ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
-  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -2068,15 +2068,15 @@ body:             |
   ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX908-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
   ; GFX908-NEXT:   $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
-  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
+  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
   ; GFX908-NEXT:   $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 8, addrspace 5)
+  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 8, addrspace 5)
   ; GFX908-NEXT:   $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX908-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.1:
@@ -2142,189 +2142,189 @@ body:             |
   ; GFX90A-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   $agpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
-  ; GFX90A-NEXT:   $agpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 8, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   $agpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
+  ; GFX90A-NEXT:   $agpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 8, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -2336,182 +2336,182 @@ body:             |
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
-  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -2568,16 +2568,16 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
-  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1 + 4, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1 + 8, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 8, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.1:
@@ -2643,188 +2643,188 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.1, align 4, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.1, align 4, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:
@@ -2836,182 +2836,182 @@ body:             |
   ; GFX90A-FLATSCR-NEXT: bb.2:
   ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
-  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -3096,11 +3096,11 @@ body:             |
   ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX908-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; GFX908-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX908-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.1:
@@ -3166,187 +3166,187 @@ body:             |
   ; GFX90A-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -3358,182 +3358,182 @@ body:             |
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   liveins: $agpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
-  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -3590,12 +3590,12 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.1:
@@ -3661,188 +3661,188 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $agpr0, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $agpr0, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:
@@ -3854,182 +3854,182 @@ body:             |
   ; GFX90A-FLATSCR-NEXT: bb.2:
   ; GFX90A-FLATSCR-NEXT:   liveins: $agpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
-  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -4113,13 +4113,13 @@ body:             |
   ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX908-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; GFX908-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
   ; GFX908-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5)
-  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
+  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX908-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.1:
@@ -4185,188 +4185,188 @@ body:             |
   ; GFX90A-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr1, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr1, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -4378,182 +4378,182 @@ body:             |
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
-  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -4610,14 +4610,14 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store (s32) into %stack.1, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store (s32) into %stack.1 + 4, addrspace 5)
-  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.1:
@@ -4683,188 +4683,188 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $agpr0_agpr1, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.1, align 4, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $agpr0_agpr1, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.1, align 4, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:
@@ -4876,182 +4876,182 @@ body:             |
   ; GFX90A-FLATSCR-NEXT: bb.2:
   ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
-  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -5134,15 +5134,15 @@ body:             |
   ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX908-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; GFX908-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
   ; GFX908-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1 + 4, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
   ; GFX908-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5)
-  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 8, addrspace 5)
+  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX908-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.1:
@@ -5208,189 +5208,189 @@ body:             |
   ; GFX90A-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr1, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1 + 4, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr2, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr1, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr2, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 8, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -5402,182 +5402,182 @@ body:             |
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
-  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: (load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: (load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: (load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: (load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: (load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: (load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: (load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: (load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: (load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: (load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: (load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: (load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: (load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: (load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: (load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: (load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: (load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: (load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: (load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: (load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: (load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: (load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: (load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: (load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: (load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: (load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: (load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: (load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: (load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: (load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: (load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: (load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: (load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: (load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: (load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: (load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: (load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: (load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: (load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: (load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: (load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: (load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: (load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: (load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: (load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: (load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: (load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: (load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: (load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: (load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: (load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: (load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: (load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: (load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: (load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: (load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: (load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: (load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: (load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: (load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: (load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: (load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: (load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: (load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: (load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: (load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: (load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: (load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: (load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: (load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: (load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: (load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: (load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: (load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: (load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: (load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: (load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: (load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: (load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: (load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: (load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: (load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: (load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: (load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: (load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: (load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: (load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: (load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: (load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: (load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: (load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: (load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: (load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: (load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: (load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: (load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: (load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: (load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: (load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: (load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: (load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: (load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: (load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: (load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: (load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: (load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: (load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: (load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: (load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: (load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: (load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: (load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: (load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: (load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: (load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: (load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: (load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: (load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: (load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: (load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: (load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: (load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: (load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: (load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: (load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: (load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: (load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: (load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: (load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: (load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: (load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: (load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: (load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: (load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: (load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: (load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: (load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: (load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: (load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: (load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: (load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: (load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: (load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -5634,16 +5634,16 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1 + 4, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.1 + 8, addrspace 5)
-  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 8, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.1:
@@ -5709,188 +5709,188 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX3 $agpr0_agpr1_agpr2, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.1, align 4, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX3 $agpr0_agpr1_agpr2, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.1, align 4, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:
@@ -5902,182 +5902,182 @@ body:             |
   ; GFX90A-FLATSCR-NEXT: bb.2:
   ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
-  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir b/llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir
index 47d489b7f35ca..fd0556d856c3d 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir
@@ -21,18 +21,18 @@ body:             |
     ; GFX942-NEXT: renamable $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27 = IMPLICIT_DEF
     ; GFX942-NEXT: renamable $agpr28_agpr29 = IMPLICIT_DEF
     ; GFX942-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
     ; GFX942-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX942-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX942-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr12_vgpr13, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s64) into %stack.0 + 48, align 4, addrspace 5)
-    ; GFX942-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; GFX942-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; GFX942-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; GFX942-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr12_vgpr13, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s64) into %stack.0 + 48, align 4, addrspace 5)
+    ; GFX942-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; GFX942-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; GFX942-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
     ; GFX942-NEXT: $agpr15 = COPY $agpr30, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
     ; GFX942-NEXT: $agpr14 = COPY $agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX942-NEXT: $agpr12_agpr13 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0 + 48, align 4, addrspace 5)
+    ; GFX942-NEXT: $agpr12_agpr13 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0 + 48, align 4, addrspace 5)
     ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
     renamable $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27 = IMPLICIT_DEF
     renamable $agpr28_agpr29 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir b/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir
index dfe4b8a33f396..59d3c0140110e 100644
--- a/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir
+++ b/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir
@@ -70,9 +70,9 @@ body: |
     ; GFX908-PEI-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = IMPLICIT_DEF
     ; GFX908-PEI-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = IMPLICIT_DEF
     ; GFX908-PEI-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-    ; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
+    ; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; GFX908-PEI-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
-    ; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; GFX908-PEI-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
     ; GFX908-PEI-NEXT: S_ENDPGM 0
     ;
@@ -86,9 +86,9 @@ body: |
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = IMPLICIT_DEF
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = IMPLICIT_DEF
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-    ; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
+    ; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
-    ; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
     ; GFX908-PEI-MACHINECP-NEXT: S_ENDPGM 0
     renamable $agpr0 = COPY renamable $vgpr0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
index a2ec87053a8d5..0cf5fe03c09ea 100644
--- a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
+++ b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
@@ -28,20 +28,20 @@ body:             |
   ; GCN-NEXT:   liveins: $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
   ; GCN-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
   ; GCN-NEXT:   renamable $vgpr44 = COPY $vgpr13, implicit $exec
   ; GCN-NEXT:   renamable $vgpr43 = COPY $vgpr12, implicit $exec
   ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit undef $scc
@@ -64,17 +64,17 @@ body:             |
   ; GCN-NEXT:   renamable $sgpr16_sgpr17 = IMPLICIT_DEF
   ; GCN-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
   ; GCN-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15, implicit $vgpr14_vgpr15 :: (store (s32) into %stack.1, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec, implicit killed $vgpr14_vgpr15 :: (store (s32) into %stack.1 + 4, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr10_vgpr11, implicit $vgpr10_vgpr11 :: (store (s32) into %stack.2, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec, implicit killed $vgpr10_vgpr11 :: (store (s32) into %stack.2 + 4, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15, implicit $vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec, implicit killed $vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr10_vgpr11, implicit $vgpr10_vgpr11 :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec, implicit killed $vgpr10_vgpr11 :: ("amdgpu-non-volatile" store (s32) into %stack.2 + 4, addrspace 5)
   ; GCN-NEXT:   dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0
-  ; GCN-NEXT:   $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15 :: (load (s32) from %stack.1, addrspace 5)
-  ; GCN-NEXT:   $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
+  ; GCN-NEXT:   $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; GCN-NEXT:   $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
   ; GCN-NEXT:   renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, killed $vgpr45_vgpr46, 0, killed $vgpr41_vgpr42, 0, killed $vgpr60_vgpr61, 0, 0, implicit $mode, implicit $exec
   ; GCN-NEXT:   FLAT_STORE_DWORDX2 killed renamable $vgpr58_vgpr59, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
-  ; GCN-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.2, addrspace 5)
-  ; GCN-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (load (s32) from %stack.2 + 4, addrspace 5)
+  ; GCN-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GCN-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2 + 4, addrspace 5)
   ; GCN-NEXT:   FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr56_vgpr57, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.2:
@@ -83,20 +83,20 @@ body:             |
   ; GCN-NEXT:   renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 0, implicit $exec
   ; GCN-NEXT:   FLAT_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr43_vgpr44, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
   ; GCN-NEXT:   FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr14_vgpr15, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
-  ; GCN-NEXT:   $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
-  ; GCN-NEXT:   $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
-  ; GCN-NEXT:   $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
-  ; GCN-NEXT:   $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
-  ; GCN-NEXT:   $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
-  ; GCN-NEXT:   $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
-  ; GCN-NEXT:   $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
-  ; GCN-NEXT:   $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
-  ; GCN-NEXT:   $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-  ; GCN-NEXT:   $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
-  ; GCN-NEXT:   $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-  ; GCN-NEXT:   $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+  ; GCN-NEXT:   $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
+  ; GCN-NEXT:   $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
+  ; GCN-NEXT:   $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
+  ; GCN-NEXT:   $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
+  ; GCN-NEXT:   $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
+  ; GCN-NEXT:   $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
+  ; GCN-NEXT:   $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+  ; GCN-NEXT:   $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
+  ; GCN-NEXT:   $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
+  ; GCN-NEXT:   $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
+  ; GCN-NEXT:   $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
+  ; GCN-NEXT:   $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
   ; GCN-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; GCN-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; GCN-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
   ; GCN-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
   ; GCN-NEXT:   S_SETPC_B64_return undef $sgpr30_sgpr31
   bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/bf16.ll b/llvm/test/CodeGen/AMDGPU/bf16.ll
index a5763816e58cc..1a841250c09f9 100644
--- a/llvm/test/CodeGen/AMDGPU/bf16.ll
+++ b/llvm/test/CodeGen/AMDGPU/bf16.ll
@@ -3508,7 +3508,7 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) {
 ; GFX1250-NEXT:    s_mov_b32 s2, s33
 ; GFX1250-NEXT:    s_mov_b32 s33, s32
 ; GFX1250-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX1250-NEXT:    scratch_store_b32 off, v4, s33 ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v4, s33 nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX1250-NEXT:    s_get_pc_i64 s[0:1]
@@ -3526,7 +3526,7 @@ define void @test_call(bfloat %in, ptr addrspace(5) %out) {
 ; GFX1250-NEXT:    s_mov_b32 s32, s33
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX1250-NEXT:    scratch_load_b32 v4, off, s33 ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v4, off, s33 nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX1250-NEXT:    s_mov_b32 s33, s2
@@ -3752,7 +3752,7 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) {
 ; GFX1250-NEXT:    s_mov_b32 s2, s33
 ; GFX1250-NEXT:    s_mov_b32 s33, s32
 ; GFX1250-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX1250-NEXT:    scratch_store_b32 off, v4, s33 ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v4, s33 nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX1250-NEXT:    s_get_pc_i64 s[0:1]
@@ -3770,7 +3770,7 @@ define void @test_call_v2bf16(<2 x bfloat> %in, ptr addrspace(5) %out) {
 ; GFX1250-NEXT:    s_mov_b32 s32, s33
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX1250-NEXT:    scratch_load_b32 v4, off, s33 ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v4, off, s33 nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX1250-NEXT:    s_mov_b32 s33, s2
@@ -4016,7 +4016,7 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) {
 ; GFX1250-NEXT:    s_mov_b32 s2, s33
 ; GFX1250-NEXT:    s_mov_b32 s33, s32
 ; GFX1250-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX1250-NEXT:    scratch_store_b32 off, v5, s33 ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v5, s33 nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX1250-NEXT:    s_get_pc_i64 s[0:1]
@@ -4038,7 +4038,7 @@ define void @test_call_v3bf16(<3 x bfloat> %in, ptr addrspace(5) %out) {
 ; GFX1250-NEXT:    s_mov_b32 s32, s33
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX1250-NEXT:    scratch_load_b32 v5, off, s33 ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v5, off, s33 nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX1250-NEXT:    s_mov_b32 s33, s2
@@ -4294,7 +4294,7 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) {
 ; GFX1250-NEXT:    s_mov_b32 s2, s33
 ; GFX1250-NEXT:    s_mov_b32 s33, s32
 ; GFX1250-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX1250-NEXT:    scratch_store_b32 off, v5, s33 ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v5, s33 nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX1250-NEXT:    s_get_pc_i64 s[0:1]
@@ -4313,7 +4313,7 @@ define void @test_call_v4bf16(<4 x bfloat> %in, ptr addrspace(5) %out) {
 ; GFX1250-NEXT:    s_mov_b32 s32, s33
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX1250-NEXT:    scratch_load_b32 v5, off, s33 ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v5, off, s33 nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX1250-NEXT:    s_mov_b32 s33, s2
@@ -4610,7 +4610,7 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) {
 ; GFX1250-NEXT:    s_mov_b32 s2, s33
 ; GFX1250-NEXT:    s_mov_b32 s33, s32
 ; GFX1250-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX1250-NEXT:    scratch_store_b32 off, v5, s33 ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v5, s33 nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX1250-NEXT:    s_get_pc_i64 s[0:1]
@@ -4628,7 +4628,7 @@ define void @test_call_v8bf16(<8 x bfloat> %in, ptr addrspace(5) %out) {
 ; GFX1250-NEXT:    s_mov_b32 s32, s33
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX1250-NEXT:    scratch_load_b32 v5, off, s33 ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v5, off, s33 nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX1250-NEXT:    s_mov_b32 s33, s2
@@ -5013,7 +5013,7 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) {
 ; GFX1250-NEXT:    s_mov_b32 s2, s33
 ; GFX1250-NEXT:    s_mov_b32 s33, s32
 ; GFX1250-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX1250-NEXT:    scratch_store_b32 off, v9, s33 ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v9, s33 nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX1250-NEXT:    s_get_pc_i64 s[0:1]
@@ -5034,7 +5034,7 @@ define void @test_call_v16bf16(<16 x bfloat> %in, ptr addrspace(5) %out) {
 ; GFX1250-NEXT:    s_mov_b32 s32, s33
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX1250-NEXT:    scratch_load_b32 v9, off, s33 ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v9, off, s33 nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX1250-NEXT:    s_mov_b32 s33, s2
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir b/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir
index 34c0159dd3ddb..aa7744f44965c 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir
+++ b/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir
@@ -35,7 +35,7 @@ body:             |
   ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
   ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1
   ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1
   ; CHECK-NEXT:   $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
@@ -61,7 +61,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
   ; CHECK-NEXT:   $sgpr1 = V_READLANE_B32 killed $vgpr2, 1
-  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
@@ -77,7 +77,7 @@ body:             |
   ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
   ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1
   ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1
   ; CHECK-NEXT:   $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
@@ -103,7 +103,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
   ; CHECK-NEXT:   $sgpr1 = V_READLANE_B32 killed $vgpr2, 1
-  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4:
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir b/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir
index 4cf92b0127131..fcf89426c4075 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir
+++ b/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir
@@ -36,7 +36,7 @@ body:             |
   ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
   ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1
   ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1
   ; CHECK-NEXT:   $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
@@ -62,7 +62,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
   ; CHECK-NEXT:   $sgpr1 = V_READLANE_B32 killed $vgpr2, 1
-  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
@@ -78,7 +78,7 @@ body:             |
   ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
   ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1
   ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1
   ; CHECK-NEXT:   $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
@@ -105,7 +105,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
   ; CHECK-NEXT:   $sgpr1 = V_READLANE_B32 killed $vgpr2, 1
-  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4:
diff --git a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir
index 7336a54ae42db..229f614cc56d1 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir
@@ -21,9 +21,9 @@ body:             |
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
   ; GCN-NEXT:   $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GCN-NEXT:   $exec = S_MOV_B64 -1
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
   ; GCN-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
   ; GCN-NEXT:   renamable $vgpr62 = IMPLICIT_DEF
   ; GCN-NEXT:   $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, killed $vgpr62
@@ -59,10 +59,10 @@ body:             |
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.4:
   ; GCN-NEXT:   $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; GCN-NEXT:   $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; GCN-NEXT:   $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GCN-NEXT:   $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
   ; GCN-NEXT:   $exec = S_MOV_B64 -1
-  ; GCN-NEXT:   $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; GCN-NEXT:   $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
   ; GCN-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
   ; GCN-NEXT:   SI_RETURN
   bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
index 7f370b2cca658..17be1db7db814 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
@@ -300,22 +300,22 @@ body:   |
     ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
     ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -326,12 +326,12 @@ body:   |
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX8-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
     ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.18, addrspace 5)
     ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX8-NEXT: $sgpr4 = S_MOV_B32 128
     ; GFX8-NEXT: $vgpr1, dead $sgpr6_sgpr7 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec
     ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
-    ; GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+    ; GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.18, addrspace 5)
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -340,43 +340,43 @@ body:   |
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
-    ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
-    ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
-    ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
-    ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
-    ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
-    ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
-    ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
-    ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
-    ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
-    ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
-    ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
+    ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
+    ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
+    ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
+    ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
+    ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
+    ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
+    ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
+    ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
+    ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
+    ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
+    ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
+    ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+    ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
     ;
     ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
     ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX900-NEXT: {{  $}}
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -387,11 +387,11 @@ body:   |
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX900-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
     ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.18, addrspace 5)
     ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
     ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
-    ; GFX900-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+    ; GFX900-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.18, addrspace 5)
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -400,22 +400,22 @@ body:   |
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
-    ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
-    ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
-    ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
-    ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
-    ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
-    ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
-    ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
-    ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
-    ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
-    ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
-    ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
+    ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
+    ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
+    ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
+    ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
+    ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
+    ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
+    ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
+    ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
+    ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
+    ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
+    ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
+    ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+    ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
     ;
     ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
@@ -447,11 +447,11 @@ body:   |
     ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX90A-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
     ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.18, addrspace 5)
     ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 64, killed $vgpr1, implicit $exec
     ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
-    ; GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+    ; GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.18, addrspace 5)
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -481,22 +481,22 @@ body:   |
     ; GFX1010-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
     ; GFX1010: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX1010-NEXT: {{  $}}
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
     ; GFX1010-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX1010-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX1010-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -518,43 +518,43 @@ body:   |
     ; GFX1010-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX1010-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX1010-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX1010-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
-    ; GFX1010-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
-    ; GFX1010-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
-    ; GFX1010-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
-    ; GFX1010-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
-    ; GFX1010-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
-    ; GFX1010-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
-    ; GFX1010-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
-    ; GFX1010-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
-    ; GFX1010-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
-    ; GFX1010-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GFX1010-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
-    ; GFX1010-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GFX1010-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GFX1010-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GFX1010-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX1010-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
+    ; GFX1010-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
+    ; GFX1010-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
+    ; GFX1010-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
+    ; GFX1010-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
+    ; GFX1010-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
+    ; GFX1010-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
+    ; GFX1010-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
+    ; GFX1010-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GFX1010-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
+    ; GFX1010-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
+    ; GFX1010-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
+    ; GFX1010-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
+    ; GFX1010-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+    ; GFX1010-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; GFX1010-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; GFX1010-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc_lo
     ;
     ; GFX1100-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
     ; GFX1100: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX1100-NEXT: {{  $}}
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr43, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr45, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr46, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr47, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr56, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.10, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr57, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.11, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr58, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.12, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr59, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.13, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr60, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.14, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr61, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.15, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr62, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.16, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr63, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr43, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr45, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr46, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr47, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr56, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr57, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr58, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr59, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr60, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr61, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr62, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr63, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
     ; GFX1100-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX1100-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX1100-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -577,43 +577,43 @@ body:   |
     ; GFX1100-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX1100-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX1100-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX1100-NEXT: $vgpr63 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.17, addrspace 5)
-    ; GFX1100-NEXT: $vgpr62 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.16, addrspace 5)
-    ; GFX1100-NEXT: $vgpr61 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.15, addrspace 5)
-    ; GFX1100-NEXT: $vgpr60 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.14, addrspace 5)
-    ; GFX1100-NEXT: $vgpr59 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.13, addrspace 5)
-    ; GFX1100-NEXT: $vgpr58 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.12, addrspace 5)
-    ; GFX1100-NEXT: $vgpr57 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.11, addrspace 5)
-    ; GFX1100-NEXT: $vgpr56 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.10, addrspace 5)
-    ; GFX1100-NEXT: $vgpr47 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
-    ; GFX1100-NEXT: $vgpr46 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
-    ; GFX1100-NEXT: $vgpr45 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.7, addrspace 5)
-    ; GFX1100-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.6, addrspace 5)
-    ; GFX1100-NEXT: $vgpr43 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.5, addrspace 5)
-    ; GFX1100-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5)
-    ; GFX1100-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
-    ; GFX1100-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX1100-NEXT: $vgpr63 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
+    ; GFX1100-NEXT: $vgpr62 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
+    ; GFX1100-NEXT: $vgpr61 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
+    ; GFX1100-NEXT: $vgpr60 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
+    ; GFX1100-NEXT: $vgpr59 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
+    ; GFX1100-NEXT: $vgpr58 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
+    ; GFX1100-NEXT: $vgpr57 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
+    ; GFX1100-NEXT: $vgpr56 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
+    ; GFX1100-NEXT: $vgpr47 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GFX1100-NEXT: $vgpr46 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
+    ; GFX1100-NEXT: $vgpr45 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
+    ; GFX1100-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
+    ; GFX1100-NEXT: $vgpr43 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
+    ; GFX1100-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+    ; GFX1100-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; GFX1100-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; GFX1100-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc_lo
     ;
     ; GFX1200-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
     ; GFX1200: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX1200-NEXT: {{  $}}
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr43, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr45, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr46, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr47, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr56, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.10, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr57, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.11, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr58, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.12, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr59, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.13, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr60, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.14, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr61, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.15, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr62, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.16, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr63, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr43, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr45, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr46, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr47, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr56, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr57, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr58, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr59, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr60, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr61, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr62, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr63, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
     ; GFX1200-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX1200-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX1200-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -636,22 +636,22 @@ body:   |
     ; GFX1200-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX1200-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX1200-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX1200-NEXT: $vgpr63 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.17, addrspace 5)
-    ; GFX1200-NEXT: $vgpr62 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.16, addrspace 5)
-    ; GFX1200-NEXT: $vgpr61 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.15, addrspace 5)
-    ; GFX1200-NEXT: $vgpr60 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.14, addrspace 5)
-    ; GFX1200-NEXT: $vgpr59 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.13, addrspace 5)
-    ; GFX1200-NEXT: $vgpr58 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.12, addrspace 5)
-    ; GFX1200-NEXT: $vgpr57 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.11, addrspace 5)
-    ; GFX1200-NEXT: $vgpr56 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.10, addrspace 5)
-    ; GFX1200-NEXT: $vgpr47 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
-    ; GFX1200-NEXT: $vgpr46 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, addrspace 5)
-    ; GFX1200-NEXT: $vgpr45 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.7, addrspace 5)
-    ; GFX1200-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.6, addrspace 5)
-    ; GFX1200-NEXT: $vgpr43 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.5, addrspace 5)
-    ; GFX1200-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5)
-    ; GFX1200-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
-    ; GFX1200-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX1200-NEXT: $vgpr63 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
+    ; GFX1200-NEXT: $vgpr62 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
+    ; GFX1200-NEXT: $vgpr61 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
+    ; GFX1200-NEXT: $vgpr60 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
+    ; GFX1200-NEXT: $vgpr59 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
+    ; GFX1200-NEXT: $vgpr58 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
+    ; GFX1200-NEXT: $vgpr57 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
+    ; GFX1200-NEXT: $vgpr56 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
+    ; GFX1200-NEXT: $vgpr47 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GFX1200-NEXT: $vgpr46 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
+    ; GFX1200-NEXT: $vgpr45 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
+    ; GFX1200-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
+    ; GFX1200-NEXT: $vgpr43 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
+    ; GFX1200-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+    ; GFX1200-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; GFX1200-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; GFX1200-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc_lo
   S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
   S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
index ade7b4266e9e6..e7bc77b8676bd 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
@@ -1053,71 +1053,71 @@ body:             |
     ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
     ; GFX7: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; GFX7-NEXT: {{  $}}
-    ; GFX7-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GFX7-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
     ; GFX7-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
     ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
     ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
-    ; GFX7-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX7-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; GFX7-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; GFX7-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
     ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
     ; GFX8-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
     ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
     ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
-    ; GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; GFX8-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
     ; GFX900: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; GFX900-NEXT: {{  $}}
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
     ; GFX900-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
     ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
     ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
-    ; GFX900-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX900-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; GFX900-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
     ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; GFX90A-NEXT: {{  $}}
-    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
     ; GFX90A-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
     ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
     ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
-    ; GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
     ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; GFX10-NEXT: {{  $}}
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
     ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
     ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec
-    ; GFX10-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX10-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; GFX10-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; GFX10-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
     ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; GFX942-NEXT: {{  $}}
-    ; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+    ; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
     ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
     ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
     ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
     ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
-    ; GFX942-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX942-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; GFX942-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; GFX942-NEXT: SI_RETURN implicit $vgpr0
     ;
@@ -1160,22 +1160,22 @@ body:             |
     ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required
     ; MUBUFW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; MUBUFW64-NEXT: {{  $}}
-    ; MUBUFW64-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; MUBUFW64-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
     ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
     ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec
-    ; MUBUFW64-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; MUBUFW64-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; MUBUFW64-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required
     ; FLATSCRW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; FLATSCRW64-NEXT: {{  $}}
-    ; FLATSCRW64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+    ; FLATSCRW64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
     ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
     ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
     ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec
-    ; FLATSCRW64-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+    ; FLATSCRW64-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; FLATSCRW64-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
     renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, %stack.1, implicit-def dead $vcc, implicit $exec
@@ -1202,11 +1202,11 @@ body:             |
     ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required
     ; MUBUFW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; MUBUFW64-NEXT: {{  $}}
-    ; MUBUFW64-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; MUBUFW64-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
     ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
     ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec
-    ; MUBUFW64-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; MUBUFW64-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; MUBUFW64-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
     ;
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index 81bd8baaa0e5d..56d65e0f91662 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -401,22 +401,22 @@ body:             |
     ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
     ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -426,12 +426,12 @@ body:             |
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
     ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX8-NEXT: $sgpr4 = S_MOV_B32 64
     ; GFX8-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
     ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+    ; GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -440,43 +440,43 @@ body:             |
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
-    ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
-    ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
-    ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
-    ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
-    ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
-    ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
-    ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
-    ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
-    ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
-    ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-    ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
+    ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
+    ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
+    ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
+    ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
+    ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
+    ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
+    ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
+    ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
+    ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
+    ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
+    ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+    ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
     ;
     ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
     ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX900-NEXT: {{  $}}
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -486,11 +486,11 @@ body:             |
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
     ; GFX900-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
     ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GFX900-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+    ; GFX900-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -499,22 +499,22 @@ body:             |
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
-    ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
-    ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
-    ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
-    ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
-    ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
-    ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
-    ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
-    ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
-    ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
-    ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-    ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
+    ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
+    ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
+    ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
+    ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
+    ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
+    ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
+    ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
+    ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
+    ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
+    ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
+    ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+    ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
     ;
     ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
@@ -545,10 +545,10 @@ body:             |
     ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
     ; GFX90A-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
+    ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -615,22 +615,22 @@ body:             |
     ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
     ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -640,12 +640,12 @@ body:             |
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.18, addrspace 5)
     ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX8-NEXT: $sgpr4 = S_MOV_B32 128
     ; GFX8-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
     ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+    ; GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.18, addrspace 5)
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -654,43 +654,43 @@ body:             |
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
-    ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
-    ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
-    ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
-    ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
-    ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
-    ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
-    ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
-    ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
-    ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
-    ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
-    ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
+    ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
+    ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
+    ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
+    ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
+    ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
+    ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
+    ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
+    ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
+    ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
+    ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
+    ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
+    ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+    ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
     ;
     ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
     ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX900-NEXT: {{  $}}
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -700,11 +700,11 @@ body:             |
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.18, addrspace 5)
     ; GFX900-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec
     ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GFX900-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+    ; GFX900-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.18, addrspace 5)
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -713,22 +713,22 @@ body:             |
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
-    ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
-    ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
-    ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
-    ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
-    ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
-    ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
-    ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
-    ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
-    ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
-    ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
-    ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
+    ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
+    ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
+    ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
+    ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
+    ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
+    ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
+    ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
+    ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
+    ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
+    ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
+    ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
+    ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+    ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
     ;
     ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
@@ -759,11 +759,11 @@ body:             |
     ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.18, addrspace 5)
     ; GFX90A-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
     ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
+    ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.18, addrspace 5)
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
index 58cd2f5bc11af..606609a5c6ae0 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
@@ -13,7 +13,7 @@ define fastcc i32 @foo() {
   ; CHECK-NEXT:   $sgpr16 = S_MOV_B32 $sgpr33
   ; CHECK-NEXT:   $sgpr33 = S_MOV_B32 $sgpr32
   ; CHECK-NEXT:   $sgpr17 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; CHECK-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr17
   ; CHECK-NEXT:   $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 512, implicit-def dead $scc
   ; CHECK-NEXT:   $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40
@@ -44,7 +44,7 @@ define fastcc i32 @foo() {
   ; CHECK-NEXT:   $sgpr32 = S_MOV_B32 $sgpr33
   ; CHECK-NEXT:   $sgpr4 = V_READLANE_B32 $vgpr40, 2
   ; CHECK-NEXT:   $sgpr5 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; CHECK-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; CHECK-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr5
   ; CHECK-NEXT:   $sgpr33 = S_MOV_B32 killed $sgpr4
   ; CHECK-NEXT:   S_WAITCNT 16240
diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir
index 86b6c5982b4cb..8a3ed8be536ba 100644
--- a/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir
+++ b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir
@@ -271,7 +271,7 @@ body:             |
   ; CHECK-NEXT:   liveins: $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $vgpr0, $vgpr1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
   ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
   ; CHECK-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40
   ; CHECK-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40
@@ -307,7 +307,7 @@ body:             |
   ; CHECK-NEXT:   $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 1
   ; CHECK-NEXT:   $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 0
   ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
   ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
   bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir b/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir
index 7a913cf50ea2b..2337c9ebf7689 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir
+++ b/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir
@@ -36,7 +36,7 @@ body:             |
   ; CHECK-NEXT:   $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
   ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/memory-legalizer-buffer-atomics.ll b/llvm/test/CodeGen/AMDGPU/memory-legalizer-buffer-atomics.ll
index b7971a024cc38..15fb5e756d058 100644
--- a/llvm/test/CodeGen/AMDGPU/memory-legalizer-buffer-atomics.ll
+++ b/llvm/test/CodeGen/AMDGPU/memory-legalizer-buffer-atomics.ll
@@ -104,10 +104,10 @@ define float @struct_buffer_atomic_add_v2f16_ret(<2 x half> %val, <4 x i32> inre
 ; GFX1250:       ; %bb.0:
 ; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
 ; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    scratch_store_b32 off, v2, s32 ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v2, s32 nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    v_mov_b32_e32 v2, v1
-; GFX1250-NEXT:    scratch_load_b32 v1, off, s32 ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v1, off, s32 nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_mov_b32 s4, s3
 ; GFX1250-NEXT:    s_mov_b32 s5, s2
 ; GFX1250-NEXT:    s_mov_b32 s6, s1
@@ -133,10 +133,10 @@ define void @struct_buffer_atomic_add_v2f16_noret(<2 x half> %val, <4 x i32> inr
 ; GFX1250:       ; %bb.0:
 ; GFX1250-NEXT:    s_wait_loadcnt_dscnt 0x0
 ; GFX1250-NEXT:    s_wait_kmcnt 0x0
-; GFX1250-NEXT:    scratch_store_b32 off, v2, s32 ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v2, s32 nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    v_mov_b32_e32 v2, v1
-; GFX1250-NEXT:    scratch_load_b32 v1, off, s32 ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v1, off, s32 nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_mov_b32 s4, s3
 ; GFX1250-NEXT:    s_mov_b32 s5, s2
 ; GFX1250-NEXT:    s_mov_b32 s6, s1
diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
index 1573544a3db14..dccfe7e2d1656 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
@@ -40,7 +40,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
   ; PEI-GFX908-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7012362 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
   ; PEI-GFX908-NEXT:   renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
   ; PEI-GFX908-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2818058 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1
-  ; PEI-GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
+  ; PEI-GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
   ; PEI-GFX908-NEXT:   $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1
   ; PEI-GFX908-NEXT:   renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec
   ; PEI-GFX908-NEXT:   GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
@@ -49,7 +49,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
   ; PEI-GFX908-NEXT:   renamable $vgpr0 = AV_MOV_B32_IMM_PSEUDO 1, implicit $exec
   ; PEI-GFX908-NEXT:   renamable $vgpr1 = AV_MOV_B32_IMM_PSEUDO 2, implicit $exec
   ; PEI-GFX908-NEXT:   renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 killed $vgpr0, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
-  ; PEI-GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5)
+  ; PEI-GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
   ; PEI-GFX908-NEXT:   $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1
   ; PEI-GFX908-NEXT:   GLOBAL_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) poison`, addrspace 1)
   ; PEI-GFX908-NEXT:   renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir
index bb248fe0444db..e5d28438ddc8a 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir
@@ -36,12 +36,12 @@ body:             |
     ; GCN-LABEL: name: preserve_active_lanes_above_args
     ; GCN: liveins: $sgpr0, $vgpr8, $vgpr9, $vgpr10
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST killed $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST killed $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec
     ; GCN-NEXT: $vgpr8 = COPY killed renamable $vgpr10
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
-    ; GCN-NEXT: $vgpr10 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr10 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
     renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec
     $vgpr8 = COPY renamable killed $vgpr10
@@ -70,7 +70,7 @@ body:             |
     ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
     ; GCN-NEXT: $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10
@@ -81,7 +81,7 @@ body:             |
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
     ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr10 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr10 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
     ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
     S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
@@ -144,7 +144,7 @@ body:             |
     ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
     ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
@@ -154,7 +154,7 @@ body:             |
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
     ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr9(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr9(tied-def 0) :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
     ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
     S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir
index 4aea915936ffc..f5f640577e7ca 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir
@@ -38,15 +38,15 @@ body:             |
     ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
     ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
     ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_ST 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_ST 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
     ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
     renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir
index 4b4e9f1d81ec6..71d3c77bfe203 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir
@@ -21,8 +21,8 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v1
     ; MUBUF: $vgpr0 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v1
@@ -35,8 +35,8 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v1
     ; FLATSCR: $vgpr0 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v1
@@ -49,8 +49,8 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v1
     ; MUBUF-GFX90A: $vgpr0 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v1
@@ -63,8 +63,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v1
     ; FLATSCR-GFX90A: $vgpr0 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v1
@@ -94,10 +94,10 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v2
     ; MUBUF: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v2
@@ -112,8 +112,8 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v2
     ; FLATSCR: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v2
@@ -128,10 +128,10 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v2
     ; MUBUF-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v2
@@ -146,8 +146,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v2
     ; FLATSCR-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v2
@@ -179,12 +179,12 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v3
     ; MUBUF: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v3
@@ -201,8 +201,8 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v3
     ; FLATSCR: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v3
@@ -219,12 +219,12 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v3
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v3
@@ -241,8 +241,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v3
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v3
@@ -276,14 +276,14 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v4
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v4
@@ -302,8 +302,8 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v4
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v4
@@ -322,14 +322,14 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v4
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v4
@@ -348,8 +348,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v4
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v4
@@ -385,16 +385,16 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v5
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v5
@@ -415,10 +415,10 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v5
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v5
@@ -439,16 +439,16 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v5
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v5
@@ -469,10 +469,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v5
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v5
@@ -510,18 +510,18 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v6
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v6
@@ -544,10 +544,10 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v6
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s64) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v6
@@ -570,18 +570,18 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v6
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v6
@@ -604,10 +604,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v6
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s64) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v6
@@ -647,20 +647,20 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v7
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v7
@@ -685,10 +685,10 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v7
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr4_vgpr5_vgpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s96) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr4_vgpr5_vgpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" store (s96) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v7
@@ -713,20 +713,20 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v7
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v7
@@ -751,10 +751,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v7
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr4_vgpr5_vgpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (store (s96) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr4_vgpr5_vgpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" store (s96) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v7
@@ -796,22 +796,22 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v8
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v8
@@ -838,10 +838,10 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v8
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v8
@@ -868,22 +868,22 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v8
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v8
@@ -910,10 +910,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v8
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v8
@@ -957,38 +957,38 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v16
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v16
@@ -1031,14 +1031,14 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v16
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v16
@@ -1081,38 +1081,38 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v16
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v16
@@ -1155,14 +1155,14 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v16
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v16
@@ -1222,70 +1222,70 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v32
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 64, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 68, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 72, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 76, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 80, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 84, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 88, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 92, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 96, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 100, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 104, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 108, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 112, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 116, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 120, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 124, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5)
-    ; MUBUF-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 64, addrspace 5)
-    ; MUBUF-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 68, addrspace 5)
-    ; MUBUF-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 72, addrspace 5)
-    ; MUBUF-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 76, addrspace 5)
-    ; MUBUF-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 80, addrspace 5)
-    ; MUBUF-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 84, addrspace 5)
-    ; MUBUF-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 88, addrspace 5)
-    ; MUBUF-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 92, addrspace 5)
-    ; MUBUF-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 96, addrspace 5)
-    ; MUBUF-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 100, addrspace 5)
-    ; MUBUF-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 104, addrspace 5)
-    ; MUBUF-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 108, addrspace 5)
-    ; MUBUF-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 112, addrspace 5)
-    ; MUBUF-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 116, addrspace 5)
-    ; MUBUF-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 120, addrspace 5)
-    ; MUBUF-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 124, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
+    ; MUBUF-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
+    ; MUBUF-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
+    ; MUBUF-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
+    ; MUBUF-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
+    ; MUBUF-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
+    ; MUBUF-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
+    ; MUBUF-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
+    ; MUBUF-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
+    ; MUBUF-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
+    ; MUBUF-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
+    ; MUBUF-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
+    ; MUBUF-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
+    ; MUBUF-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
+    ; MUBUF-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
+    ; MUBUF-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v32
@@ -1360,22 +1360,22 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v32
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 112, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 112, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v32
@@ -1450,70 +1450,70 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v32
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 64, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 68, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 72, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 76, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 80, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 84, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 88, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 92, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 96, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 100, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 104, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 108, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 112, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 116, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 120, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 124, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 64, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 68, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 72, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 76, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 80, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 84, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 88, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 92, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 96, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 100, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 104, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 108, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 112, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 116, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 120, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 124, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v32
@@ -1588,22 +1588,22 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v32
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 112, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 112, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v32
@@ -1696,8 +1696,8 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a1
     ; MUBUF: $agpr0 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -1712,8 +1712,8 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a1
     ; FLATSCR: $agpr0 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -1727,8 +1727,8 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a1
     ; MUBUF-GFX90A: $agpr0 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a1
@@ -1741,8 +1741,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a1
     ; FLATSCR-GFX90A: $agpr0 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a1
@@ -1773,12 +1773,12 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a2
     ; MUBUF: $agpr0_agpr1 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -1795,12 +1795,12 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a2
     ; FLATSCR: $agpr0_agpr1 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -1816,10 +1816,10 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a2
     ; MUBUF-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a2
@@ -1834,8 +1834,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a2
     ; FLATSCR-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a2
@@ -1868,16 +1868,16 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a3
     ; MUBUF: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -1896,16 +1896,16 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a3
     ; FLATSCR: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -1923,12 +1923,12 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a3
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a3
@@ -1945,8 +1945,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a3
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a3
@@ -1981,20 +1981,20 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a4
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2015,20 +2015,20 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a4
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2048,14 +2048,14 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a4
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a4
@@ -2074,8 +2074,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a4
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a4
@@ -2112,24 +2112,24 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a5
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2152,24 +2152,24 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a5
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2191,16 +2191,16 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a5
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a5
@@ -2221,10 +2221,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a5
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a5
@@ -2263,28 +2263,28 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a6
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2309,28 +2309,28 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a6
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2354,18 +2354,18 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a6
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a6
@@ -2388,10 +2388,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a6
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr4_agpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr4_agpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s64) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a6
@@ -2432,32 +2432,32 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a7
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2484,32 +2484,32 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a7
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2535,20 +2535,20 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a7
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a7
@@ -2573,10 +2573,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a7
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr4_agpr5_agpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (store (s96) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr4_agpr5_agpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" store (s96) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a7
@@ -2619,36 +2619,36 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a8
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2677,36 +2677,36 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a8
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2734,22 +2734,22 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a8
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a8
@@ -2776,10 +2776,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a8
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a8
@@ -2824,40 +2824,40 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a9
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2888,40 +2888,40 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a9
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2951,24 +2951,24 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a9
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a9
@@ -2997,12 +2997,12 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a9
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr8, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr8, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a9
@@ -3049,44 +3049,44 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a10
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -3119,44 +3119,44 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a10
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -3188,26 +3188,26 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a10
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a10
@@ -3238,12 +3238,12 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a10
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr8_agpr9, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (store (s64) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr8_agpr9, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" store (s64) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0 + 32, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a10
@@ -3292,48 +3292,48 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a11
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -3368,48 +3368,48 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a11
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s32) into %stack.0 + 40, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -3443,28 +3443,28 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a11
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a11
@@ -3497,12 +3497,12 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a11
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr8_agpr9_agpr10, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (store (s96) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr8_agpr9_agpr10, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" store (s96) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0 + 32, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a11
@@ -3553,52 +3553,52 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a12
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -3635,52 +3635,52 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a12
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s32) into %stack.0 + 44, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -3716,30 +3716,30 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a12
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a12
@@ -3774,12 +3774,12 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a12
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a12
@@ -3832,68 +3832,68 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a16
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
     ; MUBUF-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
     ; MUBUF-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
     ; MUBUF-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -3938,68 +3938,68 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a16
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 48, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 52, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 56, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 60, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 48, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
     ; FLATSCR-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 52, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
     ; FLATSCR-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 56, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
     ; FLATSCR-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 60, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
     ; FLATSCR-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -4043,38 +4043,38 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a16
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a16
@@ -4117,14 +4117,14 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a16
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a16
@@ -4185,132 +4185,132 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a32
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 64, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 68, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 72, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 76, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 80, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 84, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 88, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 92, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 96, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 100, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 104, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 108, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 112, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 116, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 120, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 124, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
     ; MUBUF-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
     ; MUBUF-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
     ; MUBUF-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 64, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
     ; MUBUF-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 68, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
     ; MUBUF-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 72, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
     ; MUBUF-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 76, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
     ; MUBUF-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 80, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
     ; MUBUF-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 84, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
     ; MUBUF-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 88, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
     ; MUBUF-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 92, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
     ; MUBUF-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 96, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
     ; MUBUF-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 100, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
     ; MUBUF-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 104, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
     ; MUBUF-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 108, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
     ; MUBUF-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 112, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
     ; MUBUF-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 116, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
     ; MUBUF-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 120, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
     ; MUBUF-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 124, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
     ; MUBUF-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -4387,132 +4387,132 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a32
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 48, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 52, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 56, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 60, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 64, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 68, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 72, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 76, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 80, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 84, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 88, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 92, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 96, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 100, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 104, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 108, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 112, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 116, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 120, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 124, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 48, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
     ; FLATSCR-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 52, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
     ; FLATSCR-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 56, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
     ; FLATSCR-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 60, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
     ; FLATSCR-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 64, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
     ; FLATSCR-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 68, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
     ; FLATSCR-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 72, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
     ; FLATSCR-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 76, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
     ; FLATSCR-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 80, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
     ; FLATSCR-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 84, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
     ; FLATSCR-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 88, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
     ; FLATSCR-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 92, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
     ; FLATSCR-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 96, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
     ; FLATSCR-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 100, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
     ; FLATSCR-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 104, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
     ; FLATSCR-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 108, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
     ; FLATSCR-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 112, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
     ; FLATSCR-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 116, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
     ; FLATSCR-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 120, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
     ; FLATSCR-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 124, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
     ; FLATSCR-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -4588,70 +4588,70 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a32
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 64, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 68, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 72, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 76, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 80, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 84, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 88, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 92, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 96, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 100, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 104, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 108, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 112, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 116, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 120, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 124, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 64, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 68, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 72, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 76, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 80, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 84, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 88, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 92, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 96, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 100, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 104, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 108, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 112, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 116, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 120, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 124, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a32
@@ -4726,22 +4726,22 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a32
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr16_agpr17_agpr18_agpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr20_agpr21_agpr22_agpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr24_agpr25_agpr26_agpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr28_agpr29_agpr30_agpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0 + 112, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr16_agpr17_agpr18_agpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr20_agpr21_agpr22_agpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr24_agpr25_agpr26_agpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr28_agpr29_agpr30_agpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr16_agpr17_agpr18_agpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr20_agpr21_agpr22_agpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr24_agpr25_agpr26_agpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr28_agpr29_agpr30_agpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr16_agpr17_agpr18_agpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr20_agpr21_agpr22_agpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr24_agpr25_agpr26_agpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr28_agpr29_agpr30_agpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 112, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a32
diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir
index c9208bfa15c63..5d59878a04ba5 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir
@@ -60,9 +60,9 @@ body:             |
     ; MUBUF-V2A: liveins: $agpr0
     ; MUBUF-V2A-NEXT: {{  $}}
     ; MUBUF-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1
-    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1
     ; MUBUF-V2A-NEXT: S_ENDPGM 0
     ;
@@ -71,9 +71,9 @@ body:             |
     ; FLATSCR-V2A-NEXT: {{  $}}
     ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF
     ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1
-    ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-V2A-NEXT: S_ENDPGM 0
     $vgpr0_vgpr1 = IMPLICIT_DEF
     SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
@@ -97,11 +97,11 @@ body:             |
     ; MUBUF-V2A: liveins: $agpr0
     ; MUBUF-V2A-NEXT: {{  $}}
     ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2
-    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2
     ; MUBUF-V2A-NEXT: S_ENDPGM 0
     ;
@@ -110,9 +110,9 @@ body:             |
     ; FLATSCR-V2A-NEXT: {{  $}}
     ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
     ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s64) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2
-    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (load (s64) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: S_ENDPGM 0
     $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
     SI_SPILL_V96_SAVE killed $vgpr0_vgpr1_vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5)
@@ -136,11 +136,11 @@ body:             |
     ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2
     ; MUBUF-V2A-NEXT: {{  $}}
     ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3
-    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
@@ -153,11 +153,11 @@ body:             |
     ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
     ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-V2A-NEXT: S_ENDPGM 0
     $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
     SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5)
@@ -181,13 +181,13 @@ body:             |
     ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2
     ; MUBUF-V2A-NEXT: {{  $}}
     ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
-    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
@@ -199,11 +199,11 @@ body:             |
     ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
     ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
     ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s64) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
     ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
     ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
-    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s64) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
     ; FLATSCR-V2A-NEXT: S_ENDPGM 0
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
@@ -228,13 +228,13 @@ body:             |
     ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
     ; MUBUF-V2A-NEXT: {{  $}}
     ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
-    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
@@ -249,13 +249,13 @@ body:             |
     ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
-    ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     ; FLATSCR-V2A-NEXT: S_ENDPGM 0
@@ -281,18 +281,18 @@ body:             |
     ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3
     ; MUBUF-V2A-NEXT: {{  $}}
     ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
@@ -303,12 +303,12 @@ body:             |
     ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3
     ; FLATSCR-V2A-NEXT: {{  $}}
     ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
@@ -336,33 +336,33 @@ body:             |
     ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
     ; MUBUF-V2A-NEXT: {{  $}}
     ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
     ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
     ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
@@ -374,18 +374,18 @@ body:             |
     ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
     ; FLATSCR-V2A-NEXT: {{  $}}
     ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr8_vgpr9_vgpr10, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr8_vgpr9_vgpr10, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.0 + 32, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-V2A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
-    ; FLATSCR-V2A-NEXT: $vgpr8_vgpr9_vgpr10 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr8_vgpr9_vgpr10 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0 + 32, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; FLATSCR-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir
index 2fbe08300af57..c807342bb3951 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir
@@ -21,8 +21,8 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v1
     ; MUBUF: $vgpr0 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v1
@@ -35,8 +35,8 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v1
     ; FLATSCR: $vgpr0 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v1
@@ -49,8 +49,8 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v1
     ; MUBUF-GFX90A: $vgpr0 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v1
@@ -63,8 +63,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v1
     ; FLATSCR-GFX90A: $vgpr0 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v1
@@ -94,10 +94,10 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v2
     ; MUBUF: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v2
@@ -112,8 +112,8 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v2
     ; FLATSCR: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v2
@@ -128,10 +128,10 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v2
     ; MUBUF-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v2
@@ -146,8 +146,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v2
     ; FLATSCR-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v2
@@ -179,12 +179,12 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v3
     ; MUBUF: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v3
@@ -201,8 +201,8 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v3
     ; FLATSCR: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v3
@@ -219,12 +219,12 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v3
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v3
@@ -241,8 +241,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v3
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v3
@@ -276,14 +276,14 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v4
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v4
@@ -302,8 +302,8 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v4
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v4
@@ -322,14 +322,14 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v4
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v4
@@ -348,8 +348,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v4
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v4
@@ -385,16 +385,16 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v5
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v5
@@ -415,10 +415,10 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v5
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v5
@@ -439,16 +439,16 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v5
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v5
@@ -469,10 +469,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v5
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v5
@@ -510,18 +510,18 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v6
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v6
@@ -544,10 +544,10 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v6
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s64) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v6
@@ -570,18 +570,18 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v6
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v6
@@ -604,10 +604,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v6
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s64) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v6
@@ -647,22 +647,22 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v8
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v8
@@ -689,10 +689,10 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v8
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v8
@@ -719,22 +719,22 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v8
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v8
@@ -761,10 +761,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v8
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v8
@@ -808,38 +808,38 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v16
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v16
@@ -882,14 +882,14 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v16
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v16
@@ -932,38 +932,38 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v16
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v16
@@ -1006,14 +1006,14 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v16
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v16
@@ -1073,70 +1073,70 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v32
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 64, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 68, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 72, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 76, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 80, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 84, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 88, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 92, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 96, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 100, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 104, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 108, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 112, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 116, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 120, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 124, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5)
-    ; MUBUF-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 64, addrspace 5)
-    ; MUBUF-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 68, addrspace 5)
-    ; MUBUF-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 72, addrspace 5)
-    ; MUBUF-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 76, addrspace 5)
-    ; MUBUF-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 80, addrspace 5)
-    ; MUBUF-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 84, addrspace 5)
-    ; MUBUF-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 88, addrspace 5)
-    ; MUBUF-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 92, addrspace 5)
-    ; MUBUF-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 96, addrspace 5)
-    ; MUBUF-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 100, addrspace 5)
-    ; MUBUF-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 104, addrspace 5)
-    ; MUBUF-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 108, addrspace 5)
-    ; MUBUF-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 112, addrspace 5)
-    ; MUBUF-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 116, addrspace 5)
-    ; MUBUF-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 120, addrspace 5)
-    ; MUBUF-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 124, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
+    ; MUBUF-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
+    ; MUBUF-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
+    ; MUBUF-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
+    ; MUBUF-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
+    ; MUBUF-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
+    ; MUBUF-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
+    ; MUBUF-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
+    ; MUBUF-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
+    ; MUBUF-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
+    ; MUBUF-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
+    ; MUBUF-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
+    ; MUBUF-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
+    ; MUBUF-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
+    ; MUBUF-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
+    ; MUBUF-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v32
@@ -1211,22 +1211,22 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v32
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 112, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 112, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v32
@@ -1301,70 +1301,70 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v32
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 64, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 68, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 72, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 76, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 80, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 84, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 88, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 92, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 96, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 100, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 104, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 108, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 112, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 116, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 120, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s32) into %stack.0 + 124, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 64, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 68, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 72, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 76, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 80, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 84, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 88, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 92, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 96, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 100, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 104, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 108, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 112, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 116, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 120, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 124, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v32
@@ -1439,22 +1439,22 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v32
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (store (s128) into %stack.0 + 112, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 112, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v32
@@ -1547,8 +1547,8 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a1
     ; MUBUF: $agpr0 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -1563,8 +1563,8 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a1
     ; FLATSCR: $agpr0 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -1578,8 +1578,8 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a1
     ; MUBUF-GFX90A: $agpr0 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a1
@@ -1592,8 +1592,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a1
     ; FLATSCR-GFX90A: $agpr0 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a1
@@ -1624,12 +1624,12 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a2
     ; MUBUF: $agpr0_agpr1 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -1646,12 +1646,12 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a2
     ; FLATSCR: $agpr0_agpr1 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -1667,10 +1667,10 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a2
     ; MUBUF-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a2
@@ -1685,8 +1685,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a2
     ; FLATSCR-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a2
@@ -1719,16 +1719,16 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a3
     ; MUBUF: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -1747,16 +1747,16 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a3
     ; FLATSCR: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -1774,12 +1774,12 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a3
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a3
@@ -1796,8 +1796,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a3
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a3
@@ -1832,20 +1832,20 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a4
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -1866,20 +1866,20 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a4
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -1899,14 +1899,14 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a4
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a4
@@ -1925,8 +1925,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a4
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a4
@@ -1963,24 +1963,24 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a5
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2003,24 +2003,24 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a5
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2042,16 +2042,16 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a5
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a5
@@ -2072,10 +2072,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a5
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a5
@@ -2114,28 +2114,28 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a6
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2160,28 +2160,28 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a6
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2205,18 +2205,18 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a6
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a6
@@ -2239,10 +2239,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a6
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr4_agpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (store (s64) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr4_agpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s64) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a6
@@ -2283,36 +2283,36 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a8
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2341,36 +2341,36 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a8
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2398,22 +2398,22 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a8
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a8
@@ -2440,10 +2440,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a8
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a8
@@ -2488,68 +2488,68 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a16
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
     ; MUBUF-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
     ; MUBUF-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
     ; MUBUF-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2594,68 +2594,68 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a16
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 48, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 52, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 56, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 60, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 48, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
     ; FLATSCR-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 52, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
     ; FLATSCR-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 56, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
     ; FLATSCR-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 60, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
     ; FLATSCR-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2699,38 +2699,38 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a16
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a16
@@ -2773,14 +2773,14 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a16
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a16
@@ -2841,132 +2841,132 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a32
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 64, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 68, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 72, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 76, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 80, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 84, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 88, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 92, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 96, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 100, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 104, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 108, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 112, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 116, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 120, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 124, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
     ; MUBUF-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
     ; MUBUF-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
     ; MUBUF-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 64, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
     ; MUBUF-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 68, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
     ; MUBUF-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 72, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
     ; MUBUF-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 76, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
     ; MUBUF-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 80, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
     ; MUBUF-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 84, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
     ; MUBUF-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 88, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
     ; MUBUF-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 92, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
     ; MUBUF-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 96, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
     ; MUBUF-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 100, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
     ; MUBUF-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 104, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
     ; MUBUF-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 108, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
     ; MUBUF-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 112, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
     ; MUBUF-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 116, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
     ; MUBUF-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 120, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
     ; MUBUF-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 124, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
     ; MUBUF-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -3043,132 +3043,132 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a32
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 48, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 52, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 56, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 60, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 64, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 68, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 72, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 76, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 80, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 84, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 88, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 92, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 96, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 100, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 104, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 108, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 112, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 116, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0 + 120, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 124, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 48, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
     ; FLATSCR-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 52, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
     ; FLATSCR-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 56, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
     ; FLATSCR-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 60, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
     ; FLATSCR-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 64, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
     ; FLATSCR-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 68, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
     ; FLATSCR-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 72, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
     ; FLATSCR-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 76, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
     ; FLATSCR-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 80, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
     ; FLATSCR-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 84, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
     ; FLATSCR-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 88, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
     ; FLATSCR-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 92, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
     ; FLATSCR-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 96, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
     ; FLATSCR-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 100, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
     ; FLATSCR-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 104, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
     ; FLATSCR-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 108, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
     ; FLATSCR-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 112, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
     ; FLATSCR-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 116, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
     ; FLATSCR-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 120, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
     ; FLATSCR-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0 + 124, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
     ; FLATSCR-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -3244,70 +3244,70 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a32
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 64, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 68, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 72, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 76, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 80, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 84, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 88, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 92, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 96, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 100, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 104, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 108, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 112, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 116, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 120, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s32) into %stack.0 + 124, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 64, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 68, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 72, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 76, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 80, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 84, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 88, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 92, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 96, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 100, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 104, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 108, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 112, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 116, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 120, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: (load (s32) from %stack.0 + 124, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a32
@@ -3382,22 +3382,22 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a32
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr16_agpr17_agpr18_agpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr20_agpr21_agpr22_agpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr24_agpr25_agpr26_agpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr28_agpr29_agpr30_agpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (store (s128) into %stack.0 + 112, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: (load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr16_agpr17_agpr18_agpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr20_agpr21_agpr22_agpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr24_agpr25_agpr26_agpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr28_agpr29_agpr30_agpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr16_agpr17_agpr18_agpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr20_agpr21_agpr22_agpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr24_agpr25_agpr26_agpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr28_agpr29_agpr30_agpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr16_agpr17_agpr18_agpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr20_agpr21_agpr22_agpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr24_agpr25_agpr26_agpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr28_agpr29_agpr30_agpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 112, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a32
diff --git a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
index aa4428f3da4eb..374dd2fea0270 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
@@ -30,14 +30,14 @@ body:             |
   ; CHECK-NEXT:   $sgpr0 = S_ADD_U32 $sgpr0, $sgpr4, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT:   $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT:   $sgpr4 = S_MOV_B32 524288
-  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 8192, addrspace 5)
+  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, align 8192, addrspace 5)
   ; CHECK-NEXT:   S_BRANCH %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   liveins: $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr4 = S_MOV_B32 524288
-  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, align 8192, addrspace 5)
+  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, align 8192, addrspace 5)
   ; CHECK-NEXT:   S_ENDPGM 0, implicit $vgpr0
   bb.0:
     $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
index 63a4759d8e740..8188d51e205f6 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
@@ -24,14 +24,14 @@ body:             |
     liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
 
     ; GFX8-LABEL: name: pei_scavenge_vgpr_spill
-    ; GFX8: liveins: $vgpr2, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+    ; GFX8: liveins: $vgpr2, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr2
     ; GFX8-NEXT: {{  $}}
     ; GFX8-NEXT: $sgpr4 = COPY $sgpr33
     ; GFX8-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
     ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
     ; GFX8-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GFX8-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
     ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
     ; GFX8-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
     ; GFX8-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2
@@ -49,20 +49,20 @@ body:             |
     ; GFX8-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
     ; GFX8-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GFX8-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
-    ; GFX8-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+    ; GFX8-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
     ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
     ; GFX8-NEXT: $sgpr33 = COPY $sgpr4
     ; GFX8-NEXT: S_ENDPGM 0, amdgpu_allvgprs
     ;
     ; GFX9-LABEL: name: pei_scavenge_vgpr_spill
-    ; GFX9: liveins: $vgpr2, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+    ; GFX9: liveins: $vgpr2, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr2
     ; GFX9-NEXT: {{  $}}
     ; GFX9-NEXT: $sgpr4 = COPY $sgpr33
     ; GFX9-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 524224, implicit-def $scc
     ; GFX9-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
     ; GFX9-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GFX9-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
     ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
     ; GFX9-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
     ; GFX9-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2
@@ -78,20 +78,20 @@ body:             |
     ; GFX9-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
     ; GFX9-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GFX9-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
-    ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+    ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
     ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
     ; GFX9-NEXT: $sgpr33 = COPY $sgpr4
     ; GFX9-NEXT: S_ENDPGM 0, amdgpu_allvgprs
     ;
     ; GFX9-FLATSCR-LABEL: name: pei_scavenge_vgpr_spill
-    ; GFX9-FLATSCR: liveins: $vgpr2, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
+    ; GFX9-FLATSCR: liveins: $vgpr2, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr2
     ; GFX9-FLATSCR-NEXT: {{  $}}
     ; GFX9-FLATSCR-NEXT: $sgpr4 = COPY $sgpr33
     ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_ADD_I32 $sgpr32, 8191, implicit-def $scc
     ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
     ; GFX9-FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GFX9-FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc
-    ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
+    ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
     ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
     ; GFX9-FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
     ; GFX9-FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2
@@ -106,7 +106,7 @@ body:             |
     ; GFX9-FLATSCR-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
     ; GFX9-FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GFX9-FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc
-    ; GFX9-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
+    ; GFX9-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
     ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
     ; GFX9-FLATSCR-NEXT: $sgpr33 = COPY $sgpr4
     ; GFX9-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs
diff --git a/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir b/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir
index bfca9331a5d25..a49e509904533 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir
@@ -27,10 +27,10 @@ body: |
     ; CHECK: liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $m0 = S_MOV_B32 9
-    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45
     ; CHECK-NEXT: $m0 = S_MOV_B32 9
-    ; CHECK-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5)
+    ; CHECK-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: ("amdgpu-non-volatile" load (s1024) from %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
     S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45
     S_SETPC_B64_return $sgpr30_sgpr31
@@ -51,10 +51,10 @@ body: |
     ; CHECK: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $m0 = S_MOV_B32 16711935
-    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit-def $vgpr65, implicit-def $vgpr66
     ; CHECK-NEXT: $m0 = S_MOV_B32 16711935
-    ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (load (s1024) from %stack.0, align 4, addrspace 5)
+    ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" load (s1024) from %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
     S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit-def $vgpr65, implicit-def $vgpr66
     S_SETPC_B64_return $sgpr30_sgpr31
@@ -79,18 +79,18 @@ body: |
     ; CHECK: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $m0 = S_MOV_B32 3
-    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: $m0 = S_MOV_B32 65
-    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.1, align 4, addrspace 5)
     ; CHECK-NEXT: $m0 = S_MOV_B32 1
-    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.2, align 4, addrspace 5)
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr104, implicit-def $vgpr110, implicit-def $vgpr232
     ; CHECK-NEXT: $m0 = S_MOV_B32 1
-    ; CHECK-NEXT: $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr233, implicit $vgpr234, implicit $vgpr235, implicit $vgpr236, implicit $vgpr237, implicit $vgpr238, implicit $vgpr239, implicit $vgpr248, implicit $vgpr249, implicit $vgpr250, implicit $vgpr251, implicit $vgpr252, implicit $vgpr253, implicit $vgpr254, implicit $vgpr255 :: (load (s1024) from %stack.2, align 4, addrspace 5)
+    ; CHECK-NEXT: $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr233, implicit $vgpr234, implicit $vgpr235, implicit $vgpr236, implicit $vgpr237, implicit $vgpr238, implicit $vgpr239, implicit $vgpr248, implicit $vgpr249, implicit $vgpr250, implicit $vgpr251, implicit $vgpr252, implicit $vgpr253, implicit $vgpr254, implicit $vgpr255 :: ("amdgpu-non-volatile" load (s1024) from %stack.2, align 4, addrspace 5)
     ; CHECK-NEXT: $m0 = S_MOV_B32 65
-    ; CHECK-NEXT: $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr105, implicit $vgpr106, implicit $vgpr107, implicit $vgpr108, implicit $vgpr109, implicit $vgpr111, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+    ; CHECK-NEXT: $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr105, implicit $vgpr106, implicit $vgpr107, implicit $vgpr108, implicit $vgpr109, implicit $vgpr111, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127 :: ("amdgpu-non-volatile" load (s1024) from %stack.1, align 4, addrspace 5)
     ; CHECK-NEXT: $m0 = S_MOV_B32 3
-    ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.0, align 4, addrspace 5)
+    ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: ("amdgpu-non-volatile" load (s1024) from %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
     S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr104, implicit-def $vgpr110, implicit-def $vgpr232
     S_SETPC_B64_return $sgpr30_sgpr31
@@ -113,14 +113,14 @@ body: |
     ; CHECK: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $m0 = S_MOV_B32 7
-    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: $m0 = S_MOV_B32 3
-    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.1, align 4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.1, align 4, addrspace 5)
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42, implicit-def $vgpr70_vgpr71_vgpr72_vgpr73
     ; CHECK-NEXT: $m0 = S_MOV_B32 3
-    ; CHECK-NEXT: $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr74, implicit $vgpr75, implicit $vgpr76, implicit $vgpr77, implicit $vgpr78, implicit $vgpr79, implicit $vgpr88, implicit $vgpr89, implicit $vgpr90, implicit $vgpr91, implicit $vgpr92, implicit $vgpr93, implicit $vgpr94, implicit $vgpr95 :: (load (s1024) from %stack.1, align 4, addrspace 5)
+    ; CHECK-NEXT: $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr74, implicit $vgpr75, implicit $vgpr76, implicit $vgpr77, implicit $vgpr78, implicit $vgpr79, implicit $vgpr88, implicit $vgpr89, implicit $vgpr90, implicit $vgpr91, implicit $vgpr92, implicit $vgpr93, implicit $vgpr94, implicit $vgpr95 :: ("amdgpu-non-volatile" load (s1024) from %stack.1, align 4, addrspace 5)
     ; CHECK-NEXT: $m0 = S_MOV_B32 7
-    ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.0, align 4, addrspace 5)
+    ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: ("amdgpu-non-volatile" load (s1024) from %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
     S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42, implicit-def $vgpr70_vgpr71_vgpr72_vgpr73
     S_SETPC_B64_return $sgpr30_sgpr31
@@ -149,12 +149,12 @@ body: |
     ; CHECK: liveins: $vgpr48, $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $m0 = S_MOV_B32 1
-    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.2, align 4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.2, align 4, addrspace 5)
     ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40
     ; CHECK-NEXT: $m0 = S_MOV_B32 1
-    ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.2, align 4, addrspace 5)
+    ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: ("amdgpu-non-volatile" load (s1024) from %stack.2, align 4, addrspace 5)
     ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
     SCRATCH_STORE_DWORD_SADDR $vgpr48, %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
     SCRATCH_STORE_DWORD_SADDR $vgpr48, %stack.1, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
@@ -183,22 +183,22 @@ body: |
     ; W32: liveins: $sgpr48, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71
     ; W32-NEXT: {{  $}}
     ; W32-NEXT: $sgpr0 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-    ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
-    ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
+    ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
     ; W32-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0
     ; W32-NEXT: $m0 = S_MOV_B32 9
-    ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.4, align 4, addrspace 5)
+    ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.4, align 4, addrspace 5)
     ; W32-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr48, 0, $vgpr44
     ; W32-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec
     ; W32-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40
     ; W32-NEXT: $sgpr48 = SI_RESTORE_S32_FROM_VGPR $vgpr44, 0
     ; W32-NEXT: $m0 = S_MOV_B32 9
-    ; W32-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.4, align 4, addrspace 5)
+    ; W32-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: ("amdgpu-non-volatile" load (s1024) from %stack.4, align 4, addrspace 5)
     ; W32-NEXT: $sgpr0 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; W32-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
-    ; W32-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
-    ; W32-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
+    ; W32-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; W32-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; W32-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
     ; W32-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0
     ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
     ;
@@ -206,22 +206,22 @@ body: |
     ; W64: liveins: $sgpr48, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71
     ; W64-NEXT: {{  $}}
     ; W64-NEXT: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-    ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
-    ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
+    ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
     ; W64-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
     ; W64-NEXT: $m0 = S_MOV_B32 9
-    ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.4, align 4, addrspace 5)
+    ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.4, align 4, addrspace 5)
     ; W64-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr48, 0, $vgpr44
     ; W64-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec
     ; W64-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40
     ; W64-NEXT: $sgpr48 = SI_RESTORE_S32_FROM_VGPR $vgpr44, 0
     ; W64-NEXT: $m0 = S_MOV_B32 9
-    ; W64-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: (load (s1024) from %stack.4, align 4, addrspace 5)
+    ; W64-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: ("amdgpu-non-volatile" load (s1024) from %stack.4, align 4, addrspace 5)
     ; W64-NEXT: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; W64-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
-    ; W64-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
-    ; W64-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
+    ; W64-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; W64-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; W64-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
     ; W64-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
     ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
     S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec
@@ -261,7 +261,7 @@ body: |
   ; CHECK-NEXT:   liveins: $vgpr44, $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $m0 = S_MOV_B32 11
-  ; CHECK-NEXT:   SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: (store (s1024) into %stack.0, align 4, addrspace 5)
+  ; CHECK-NEXT:   SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.0, align 4, addrspace 5)
   ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45
   ; CHECK-NEXT:   S_BRANCH %bb.1
   ; CHECK-NEXT: {{  $}}
@@ -276,7 +276,7 @@ body: |
   ; CHECK-NEXT:   liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $m0 = S_MOV_B32 11
-  ; CHECK-NEXT:   $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: (load (s1024) from %stack.0, align 4, addrspace 5)
+  ; CHECK-NEXT:   $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: ("amdgpu-non-volatile" load (s1024) from %stack.0, align 4, addrspace 5)
   ; CHECK-NEXT:   S_SETPC_B64_return $sgpr30_sgpr31
   bb.0:
     liveins: $sgpr30_sgpr31, $vgpr44
diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir b/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir
index e4cbae66d47fa..8a6813039f670 100644
--- a/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir
@@ -39,7 +39,7 @@ body:             |
     ; GFX908-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr7, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX908-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec
     ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec
     ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec
@@ -148,74 +148,74 @@ body:             |
     ; GFX908-NEXT: $vgpr35 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
     ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr35, implicit $exec, implicit $exec
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 12, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr8, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr9, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 20, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr10, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 24, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr11, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 28, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr12, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 32, 0, 0, implicit $exec :: (store (s32) into %stack.8, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr13, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 36, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 40, 0, 0, implicit $exec :: (store (s32) into %stack.10, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 44, 0, 0, implicit $exec :: (store (s32) into %stack.11, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr16, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 48, 0, 0, implicit $exec :: (store (s32) into %stack.12, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr17, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 52, 0, 0, implicit $exec :: (store (s32) into %stack.13, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr18, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 56, 0, 0, implicit $exec :: (store (s32) into %stack.14, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr19, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 60, 0, 0, implicit $exec :: (store (s32) into %stack.15, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr20, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 64, 0, 0, implicit $exec :: (store (s32) into %stack.16, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr21, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 68, 0, 0, implicit $exec :: (store (s32) into %stack.17, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr22, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 72, 0, 0, implicit $exec :: (store (s32) into %stack.18, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.18, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr23, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 76, 0, 0, implicit $exec :: (store (s32) into %stack.19, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.19, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr24, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 80, 0, 0, implicit $exec :: (store (s32) into %stack.20, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.20, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr25, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 84, 0, 0, implicit $exec :: (store (s32) into %stack.21, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.21, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr26, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 88, 0, 0, implicit $exec :: (store (s32) into %stack.22, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.22, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr27, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 92, 0, 0, implicit $exec :: (store (s32) into %stack.23, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.23, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr28, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 96, 0, 0, implicit $exec :: (store (s32) into %stack.24, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.24, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr29, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 100, 0, 0, implicit $exec :: (store (s32) into %stack.25, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.25, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr30, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 104, 0, 0, implicit $exec :: (store (s32) into %stack.26, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.26, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr31, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 108, 0, 0, implicit $exec :: (store (s32) into %stack.27, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.27, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr34, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 112, 0, 0, implicit $exec :: (store (s32) into %stack.28, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.28, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr35, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 116, 0, 0, implicit $exec :: (store (s32) into %stack.29, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.29, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr36, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 120, 0, 0, implicit $exec :: (store (s32) into %stack.30, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.30, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr37, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 124, 0, 0, implicit $exec :: (store (s32) into %stack.31, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.31, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr38, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 128, 0, 0, implicit $exec :: (store (s32) into %stack.32, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.32, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr39, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 132, 0, 0, implicit $exec :: (store (s32) into %stack.33, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.33, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr40, implicit $exec, implicit $exec
     ; GFX908-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19, implicit $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, implicit $vgpr35
-    ; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec
     ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $exec
     ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec
@@ -287,39 +287,39 @@ body:             |
     ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec
     ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec, implicit $exec
     ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec
-    ; GFX908-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-    ; GFX908-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
-    ; GFX908-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 12, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
-    ; GFX908-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
-    ; GFX908-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 20, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
-    ; GFX908-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 24, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
-    ; GFX908-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 28, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
-    ; GFX908-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 32, 0, 0, implicit $exec :: (load (s32) from %stack.8, addrspace 5)
-    ; GFX908-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 36, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
-    ; GFX908-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 40, 0, 0, implicit $exec :: (load (s32) from %stack.10, addrspace 5)
-    ; GFX908-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 44, 0, 0, implicit $exec :: (load (s32) from %stack.11, addrspace 5)
-    ; GFX908-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 48, 0, 0, implicit $exec :: (load (s32) from %stack.12, addrspace 5)
-    ; GFX908-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 52, 0, 0, implicit $exec :: (load (s32) from %stack.13, addrspace 5)
-    ; GFX908-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 56, 0, 0, implicit $exec :: (load (s32) from %stack.14, addrspace 5)
-    ; GFX908-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 60, 0, 0, implicit $exec :: (load (s32) from %stack.15, addrspace 5)
-    ; GFX908-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 64, 0, 0, implicit $exec :: (load (s32) from %stack.16, addrspace 5)
-    ; GFX908-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 68, 0, 0, implicit $exec :: (load (s32) from %stack.17, addrspace 5)
-    ; GFX908-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 72, 0, 0, implicit $exec :: (load (s32) from %stack.18, addrspace 5)
-    ; GFX908-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 76, 0, 0, implicit $exec :: (load (s32) from %stack.19, addrspace 5)
-    ; GFX908-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 80, 0, 0, implicit $exec :: (load (s32) from %stack.20, addrspace 5)
-    ; GFX908-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 84, 0, 0, implicit $exec :: (load (s32) from %stack.21, addrspace 5)
-    ; GFX908-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 88, 0, 0, implicit $exec :: (load (s32) from %stack.22, addrspace 5)
-    ; GFX908-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 92, 0, 0, implicit $exec :: (load (s32) from %stack.23, addrspace 5)
-    ; GFX908-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 96, 0, 0, implicit $exec :: (load (s32) from %stack.24, addrspace 5)
-    ; GFX908-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 100, 0, 0, implicit $exec :: (load (s32) from %stack.25, addrspace 5)
-    ; GFX908-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 104, 0, 0, implicit $exec :: (load (s32) from %stack.26, addrspace 5)
-    ; GFX908-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 108, 0, 0, implicit $exec :: (load (s32) from %stack.27, addrspace 5)
-    ; GFX908-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 112, 0, 0, implicit $exec :: (load (s32) from %stack.28, addrspace 5)
-    ; GFX908-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 116, 0, 0, implicit $exec :: (load (s32) from %stack.29, addrspace 5)
-    ; GFX908-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 120, 0, 0, implicit $exec :: (load (s32) from %stack.30, addrspace 5)
-    ; GFX908-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 124, 0, 0, implicit $exec :: (load (s32) from %stack.31, addrspace 5)
-    ; GFX908-NEXT: $vgpr32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 128, 0, 0, implicit $exec :: (load (s32) from %stack.32, addrspace 5)
-    ; GFX908-NEXT: $vgpr33 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 132, 0, 0, implicit $exec :: (load (s32) from %stack.33, addrspace 5)
+    ; GFX908-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GFX908-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GFX908-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; GFX908-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+    ; GFX908-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
+    ; GFX908-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
+    ; GFX908-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
+    ; GFX908-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
+    ; GFX908-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GFX908-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
+    ; GFX908-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
+    ; GFX908-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
+    ; GFX908-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
+    ; GFX908-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
+    ; GFX908-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
+    ; GFX908-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
+    ; GFX908-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
+    ; GFX908-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.18, addrspace 5)
+    ; GFX908-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.19, addrspace 5)
+    ; GFX908-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.20, addrspace 5)
+    ; GFX908-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.21, addrspace 5)
+    ; GFX908-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.22, addrspace 5)
+    ; GFX908-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.23, addrspace 5)
+    ; GFX908-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.24, addrspace 5)
+    ; GFX908-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.25, addrspace 5)
+    ; GFX908-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.26, addrspace 5)
+    ; GFX908-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.27, addrspace 5)
+    ; GFX908-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.28, addrspace 5)
+    ; GFX908-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.29, addrspace 5)
+    ; GFX908-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.30, addrspace 5)
+    ; GFX908-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.31, addrspace 5)
+    ; GFX908-NEXT: $vgpr32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.32, addrspace 5)
+    ; GFX908-NEXT: $vgpr33 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.33, addrspace 5)
     ; GFX908-NEXT: S_NOP 0, implicit renamable $agpr0, implicit killed renamable $vgpr1, implicit killed renamable $vgpr2, implicit killed renamable $vgpr3, implicit killed renamable $vgpr4, implicit killed renamable $vgpr5, implicit killed renamable $vgpr6, implicit killed renamable $vgpr7, implicit killed renamable $vgpr8, implicit killed renamable $vgpr9, implicit killed renamable $vgpr10, implicit killed renamable $vgpr11, implicit killed renamable $vgpr12, implicit killed renamable $vgpr13, implicit killed renamable $vgpr14, implicit killed renamable $vgpr15, implicit killed renamable $vgpr16, implicit killed renamable $vgpr17, implicit killed renamable $vgpr18, implicit killed renamable $vgpr19, implicit killed renamable $vgpr20, implicit killed renamable $vgpr21, implicit killed renamable $vgpr22, implicit killed renamable $vgpr23, implicit killed renamable $vgpr24, implicit killed renamable $vgpr25, implicit killed renamable $vgpr26, implicit killed renamable $vgpr27, implicit killed renamable $vgpr28, implicit killed renamable $vgpr29, implicit killed renamable $vgpr30, implicit killed renamable $vgpr31, implicit killed renamable $vgpr32, implicit killed renamable $vgpr33, implicit killed renamable $vgpr34
     ; GFX908-NEXT: S_ENDPGM 0, implicit killed renamable $agpr0
     %v0:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir
index 592e0f0cf0c24..38db31cf937ef 100644
--- a/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir
@@ -20,11 +20,11 @@ body: |
     ; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF
     ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
     ; CHECK-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
     ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr0, implicit $sgpr4_sgpr5
     ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5
     ; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr0
     ; CHECK-NEXT: S_ENDPGM 0
     $vgpr0 = IMPLICIT_DEF
@@ -53,11 +53,11 @@ body: |
     ; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF
     ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
     ; CHECK-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
     ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr0, implicit $sgpr4_sgpr5
     ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr0
     ; CHECK-NEXT: S_ENDPGM 0
     $vgpr0 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
index b4f23ec00b8e2..cbc00b7c5b67b 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
@@ -33,11 +33,11 @@ body:             |
   ; GCN-NEXT:   $vcc_hi = frame-setup COPY $sgpr33
   ; GCN-NEXT:   $sgpr33 = frame-setup COPY $sgpr32
   ; GCN-NEXT:   $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.69, addrspace 5)
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.70, addrspace 5)
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.71, addrspace 5)
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.72, addrspace 5)
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.73, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
   ; GCN-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr0
   ; GCN-NEXT:   $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24, implicit-def dead $scc
   ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr2
@@ -200,11 +200,11 @@ body:             |
   ; GCN-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
   ; GCN-NEXT:   $sgpr32 = frame-destroy COPY $sgpr33
   ; GCN-NEXT:   $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; GCN-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.69, addrspace 5)
-  ; GCN-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.70, addrspace 5)
-  ; GCN-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.71, addrspace 5)
-  ; GCN-NEXT:   $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.72, addrspace 5)
-  ; GCN-NEXT:   $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.73, addrspace 5)
+  ; GCN-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
+  ; GCN-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
+  ; GCN-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
+  ; GCN-NEXT:   $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
+  ; GCN-NEXT:   $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
   ; GCN-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr0
   ; GCN-NEXT:   $sgpr33 = frame-destroy COPY $vcc_hi
   ; GCN-NEXT:   S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
index 59c4b715dd12e..3e13bdfadea98 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
@@ -26,11 +26,11 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1, implicit-def $vgpr0
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr8, 0, undef $vgpr0
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr0
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -76,12 +76,12 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 3, implicit-def $vgpr0
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr8, 0, undef $vgpr0, implicit $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr9, 1, $vgpr0, implicit $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr0
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -125,11 +125,11 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1, implicit-def $vgpr0
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr0
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -173,12 +173,12 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 3, implicit-def $vgpr0
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr0
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -223,14 +223,14 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr8, 0, undef $vgpr0
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -276,14 +276,14 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -332,15 +332,15 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 3
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr8, 0, undef $vgpr0, implicit $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr9, 1, $vgpr0, implicit $sgpr8_sgpr9
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -386,15 +386,15 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 3
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -446,24 +446,24 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr8, 0, undef $vgpr0
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr9, 0, undef $vgpr0
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -510,24 +510,24 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 16392, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
index cac9c85130a7b..370ca8073567b 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
@@ -28,17 +28,17 @@ body:             |
     ; CHECK-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
     ; CHECK-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
     ; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
     ; CHECK-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr10, 0, undef $vgpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
     ; CHECK-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
     ; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; CHECK-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR killed $vgpr1, 0
-    ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
     ; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $scc
     S_CMP_EQ_U32 0, 0, implicit-def $scc
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
index ba2e80fdc04c8..fd5044031f3ef 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
@@ -68,74 +68,74 @@ body:             |
     ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 0, undef $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 28, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
@@ -144,13 +144,13 @@ body:             |
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
@@ -167,13 +167,13 @@ body:             |
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 96, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
@@ -206,17 +206,17 @@ body:             |
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 262144, implicit-def dead $scc
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, align 4096, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, align 4096, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ;
     ; GCN32-MUBUF-LABEL: name: check_spill
@@ -232,74 +232,74 @@ body:             |
     ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 0, undef $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 15, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, implicit $exec :: (store (s32) into %stack.3, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (store (s32) into %stack.4, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
@@ -308,13 +308,13 @@ body:             |
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (store (s32) into %stack.5, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
@@ -331,13 +331,13 @@ body:             |
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, implicit $exec :: (store (s32) into %stack.6, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
@@ -370,17 +370,17 @@ body:             |
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (store (s32) into %stack.7, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr1 = S_ADD_I32 $sgpr33, 131072, implicit-def dead $scc
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, implicit $exec :: (store (s32) into %stack.8, align 4096, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, align 4096, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ;
     ; GCN64-FLATSCR-LABEL: name: check_spill
@@ -392,74 +392,74 @@ body:             |
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 0, undef $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
@@ -468,13 +468,13 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
@@ -491,13 +491,13 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
@@ -530,17 +530,17 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, killed $sgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.8, align 4096, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, killed $sgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.8, align 4096, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     renamable $sgpr12 = IMPLICIT_DEF
     SI_SPILL_S32_SAVE killed $sgpr12, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
@@ -626,53 +626,53 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr29 = S_ADDC_U32 $sgpr29, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13
     ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
     ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 2
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 28, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
     ; GCN64-MUBUF-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 3
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
     ; GCN64-MUBUF-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
     ; GCN64-MUBUF-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 4
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -681,12 +681,12 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 5
     ; GCN64-MUBUF-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 6
     ; GCN64-MUBUF-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 7
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 96, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -703,12 +703,12 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 13
     ; GCN64-MUBUF-NEXT: $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 14
     ; GCN64-MUBUF-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 15
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-MUBUF-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-MUBUF-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -741,15 +741,15 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 29
     ; GCN64-MUBUF-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 30
     ; GCN64-MUBUF-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 31
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 262144, implicit-def dead $scc
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, align 4096, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, align 4096, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ;
     ; GCN32-MUBUF-LABEL: name: check_reload
@@ -764,53 +764,53 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13
     ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
     ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN32-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 2
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 15, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, implicit $exec :: (load (s32) from %stack.3, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN32-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
     ; GCN32-MUBUF-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 3
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: (load (s32) from %stack.4, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN32-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
     ; GCN32-MUBUF-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
     ; GCN32-MUBUF-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 4
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: (load (s32) from %stack.5, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN32-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -819,12 +819,12 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 5
     ; GCN32-MUBUF-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 6
     ; GCN32-MUBUF-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 7
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, implicit $exec :: (load (s32) from %stack.6, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN32-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -841,12 +841,12 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 13
     ; GCN32-MUBUF-NEXT: $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 14
     ; GCN32-MUBUF-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 15
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: (load (s32) from %stack.7, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN32-MUBUF-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN32-MUBUF-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -879,15 +879,15 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 29
     ; GCN32-MUBUF-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 30
     ; GCN32-MUBUF-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 31
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr1 = S_ADD_I32 $sgpr33, 131072, implicit-def dead $scc
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, implicit $exec :: (load (s32) from %stack.8, align 4096, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, align 4096, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ;
     ; GCN64-FLATSCR-LABEL: name: check_reload
@@ -898,53 +898,53 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13
     ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
     ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-FLATSCR-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 2
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-FLATSCR-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
     ; GCN64-FLATSCR-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 3
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-FLATSCR-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
     ; GCN64-FLATSCR-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
     ; GCN64-FLATSCR-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 4
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.5, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-FLATSCR-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -953,12 +953,12 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 5
     ; GCN64-FLATSCR-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 6
     ; GCN64-FLATSCR-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 7
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.6, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-FLATSCR-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -975,12 +975,12 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 13
     ; GCN64-FLATSCR-NEXT: $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 14
     ; GCN64-FLATSCR-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 15
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.7, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-FLATSCR-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-FLATSCR-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -1013,15 +1013,15 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 29
     ; GCN64-FLATSCR-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 30
     ; GCN64-FLATSCR-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 31
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.8, align 4096, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.8, align 4096, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     renamable $sgpr12 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
 
diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
index 9d25df4738709..82a6be138a71c 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
@@ -18,9 +18,9 @@ body:             |
     ; CHECK: liveins: $agpr0_agpr1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
 ...
 
@@ -43,9 +43,9 @@ body:             |
     ; CHECK: liveins: $agpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
 ...
 
@@ -66,9 +66,9 @@ body:             |
     ; CHECK: liveins: $agpr1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: (store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
 ...
 
diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
index 3f6956b83ae92..8f564cbd2a2c1 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
@@ -255,14 +255,14 @@ body: |
   ; GFX908-EXPANDED-NEXT: {{  $}}
   ; GFX908-EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $agpr0
   ; GFX908-EXPANDED-NEXT:   $vgpr63 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
-  ; GFX908-EXPANDED-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+  ; GFX908-EXPANDED-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
   ; GFX908-EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; GFX908-EXPANDED-NEXT: {{  $}}
   ; GFX908-EXPANDED-NEXT: bb.1:
   ; GFX908-EXPANDED-NEXT:   successors: %bb.2(0x80000000)
   ; GFX908-EXPANDED-NEXT: {{  $}}
   ; GFX908-EXPANDED-NEXT: bb.2:
-  ; GFX908-EXPANDED-NEXT:   $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+  ; GFX908-EXPANDED-NEXT:   $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
   ; GFX908-EXPANDED-NEXT:   $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
   ; GFX908-EXPANDED-NEXT:   S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
   ; GFX908-EXPANDED-NEXT:   S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
@@ -320,14 +320,14 @@ body: |
   ; GFX90A-EXPANDED-NEXT:   liveins: $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-EXPANDED-NEXT: {{  $}}
   ; GFX90A-EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $agpr0
-  ; GFX90A-EXPANDED-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+  ; GFX90A-EXPANDED-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
   ; GFX90A-EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; GFX90A-EXPANDED-NEXT: {{  $}}
   ; GFX90A-EXPANDED-NEXT: bb.1:
   ; GFX90A-EXPANDED-NEXT:   successors: %bb.2(0x80000000)
   ; GFX90A-EXPANDED-NEXT: {{  $}}
   ; GFX90A-EXPANDED-NEXT: bb.2:
-  ; GFX90A-EXPANDED-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+  ; GFX90A-EXPANDED-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
   ; GFX90A-EXPANDED-NEXT:   S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
   ; GFX90A-EXPANDED-NEXT:   S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
   ; GFX90A-EXPANDED-NEXT:   S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
index f4edafd9443ab..1b6a3be0d257d 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
@@ -23,7 +23,7 @@ body:             |
     ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr8_sgpr9 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9
     ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2
     ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
@@ -33,7 +33,7 @@ body:             |
     ; GCN-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
     ; GCN-NEXT: renamable $sgpr8 = COPY renamable $sgpr1
     ; GCN-NEXT: $sgpr0_sgpr1 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
     ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr8
     renamable $sgpr1 = COPY $sgpr2
@@ -61,7 +61,7 @@ body:             |
     ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr8_sgpr9 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9
     ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2
     ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
@@ -70,7 +70,7 @@ body:             |
     ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr2, 2, killed $vgpr0
     ; GCN-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
     ; GCN-NEXT: $sgpr0_sgpr1 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
     ; GCN-NEXT: S_ENDPGM 0
     renamable $sgpr1 = COPY $sgpr2
@@ -96,10 +96,10 @@ body:             |
     ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr1 = COPY $vgpr2, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; GCN-NEXT: renamable $vgpr8 = COPY $vgpr2, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr8
     renamable $vgpr1 = COPY $vgpr2
@@ -126,10 +126,10 @@ body:             |
     ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr1 = COPY $vgpr2, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0
     renamable $vgpr1 = COPY $vgpr2
     SI_SPILL_V128_SAVE renamable killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/spill-restore-partial-copy.mir b/llvm/test/CodeGen/AMDGPU/spill-restore-partial-copy.mir
index bb87b6e52da89..efeaf97365d25 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-restore-partial-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-restore-partial-copy.mir
@@ -162,7 +162,7 @@ body:             |
     ; GFX950-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit killed $vgpr16_vgpr17_vgpr18_vgpr19
     ; GFX950-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr20_vgpr21_vgpr22_vgpr23
     ; GFX950-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec, implicit $vgpr20_vgpr21_vgpr22_vgpr23
-    ; GFX950-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr20_vgpr21, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr20_vgpr21_vgpr22_vgpr23 :: (store (s64) into %stack.5, align 4, addrspace 5)
+    ; GFX950-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr20_vgpr21, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr20_vgpr21_vgpr22_vgpr23 :: ("amdgpu-non-volatile" store (s64) into %stack.5, align 4, addrspace 5)
     ; GFX950-NEXT: $vgpr0 = IMPLICIT_DEF
     ; GFX950-NEXT: $agpr5 = COPY $agpr6, implicit-def $agpr2_agpr3_agpr4_agpr5
     ; GFX950-NEXT: $agpr4 = COPY $agpr7, implicit $agpr2_agpr3_agpr4_agpr5
@@ -191,7 +191,7 @@ body:             |
     ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 4096, 0, implicit $exec
     ; GFX950-NEXT: $agpr5 = COPY $agpr26, implicit-def $agpr2_agpr3_agpr4_agpr5
     ; GFX950-NEXT: $agpr4 = COPY $agpr27, implicit $agpr2_agpr3_agpr4_agpr5
-    ; GFX950-NEXT: $agpr2_agpr3 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr2_agpr3_agpr4_agpr5, implicit $agpr2_agpr3_agpr4_agpr5 :: (load (s64) from %stack.5, align 4, addrspace 5)
+    ; GFX950-NEXT: $agpr2_agpr3 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr2_agpr3_agpr4_agpr5, implicit $agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" load (s64) from %stack.5, align 4, addrspace 5)
     ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 5120, 0, implicit $exec
     ; GFX950-NEXT: S_ENDPGM 0
     renamable $agpr0_agpr1 = IMPLICIT_DEF
@@ -266,7 +266,7 @@ body:             |
     ; GFX950-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec, implicit $vgpr16_vgpr17_vgpr18_vgpr19
     ; GFX950-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec, implicit $vgpr16_vgpr17_vgpr18_vgpr19
     ; GFX950-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit killed $vgpr16_vgpr17_vgpr18_vgpr19
-    ; GFX950-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s128) into %stack.5, align 4, addrspace 5)
+    ; GFX950-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.5, align 4, addrspace 5)
     ; GFX950-NEXT: $vgpr0 = IMPLICIT_DEF
     ; GFX950-NEXT: $agpr5 = COPY $agpr6, implicit-def $agpr2_agpr3_agpr4_agpr5
     ; GFX950-NEXT: $agpr4 = COPY $agpr7, implicit $agpr2_agpr3_agpr4_agpr5
@@ -293,7 +293,7 @@ body:             |
     ; GFX950-NEXT: $agpr3 = COPY $agpr24, implicit $agpr2_agpr3_agpr4_agpr5
     ; GFX950-NEXT: $agpr2 = COPY $agpr25, implicit $agpr2_agpr3_agpr4_agpr5
     ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 4096, 0, implicit $exec
-    ; GFX950-NEXT: $agpr2_agpr3_agpr4_agpr5 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s128) from %stack.5, align 4, addrspace 5)
+    ; GFX950-NEXT: $agpr2_agpr3_agpr4_agpr5 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.5, align 4, addrspace 5)
     ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 5120, 0, implicit $exec
     ; GFX950-NEXT: S_ENDPGM 0
     renamable $agpr0_agpr1 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
index 639bf6a6d550c..8c6f85cf4a388 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
@@ -50,28 +50,28 @@ body:             |
     ; GFX9-NEXT: $vcc = IMPLICIT_DEF
     ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
     ; GFX9-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
     ; GFX9-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit $vcc
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GFX9-NEXT: $vcc = IMPLICIT_DEF
     ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
     ; GFX9-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
     ; GFX9-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit killed $vcc
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; GFX9-NEXT: $vcc_lo = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $vcc
     ; GFX9-NEXT: $vcc_hi = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
-    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ;
     ; GFX10-LABEL: name: check_vcc
@@ -87,28 +87,28 @@ body:             |
     ; GFX10-NEXT: $vcc = IMPLICIT_DEF
     ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
     ; GFX10-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
     ; GFX10-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit $vcc
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GFX10-NEXT: $vcc = IMPLICIT_DEF
     ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
     ; GFX10-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
     ; GFX10-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit killed $vcc
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; GFX10-NEXT: $vcc_lo = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $vcc
     ; GFX10-NEXT: $vcc_hi = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
-    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ;
     ; GFX11-LABEL: name: check_vcc
@@ -118,28 +118,28 @@ body:             |
     ; GFX11-NEXT: $vcc = IMPLICIT_DEF
     ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
     ; GFX11-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
     ; GFX11-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit $vcc
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GFX11-NEXT: $vcc = IMPLICIT_DEF
     ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
     ; GFX11-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
     ; GFX11-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit killed $vcc
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; GFX11-NEXT: $vcc_lo = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $vcc
     ; GFX11-NEXT: $vcc_hi = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
-    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
+    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     $vcc = IMPLICIT_DEF
     SI_SPILL_S64_SAVE $vcc, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
diff --git a/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir b/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir
index beeb9b2df8b01..c93732a052b4e 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir
@@ -17,9 +17,9 @@ body:             |
     ; GCN: liveins: $agpr30, $agpr31, $agpr28_agpr29, $agpr24_agpr25_agpr26_agpr27, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s96) into %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s96) from %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28_agpr29, implicit $agpr30
     SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -43,10 +43,10 @@ body:             |
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s64) into %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s64) from %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28_agpr29
     SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -71,11 +71,11 @@ body:             |
     ; GCN-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28
     SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -127,9 +127,9 @@ body:             |
     ; GCN: liveins: $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr52_vgpr53, $vgpr48_vgpr49_vgpr50_vgpr51, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s96) into %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (load (s96) from %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52_vgpr53, implicit $vgpr54
     SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -153,10 +153,10 @@ body:             |
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s64) into %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (load (s64) from %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52_vgpr53
     SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -181,11 +181,11 @@ body:             |
     ; GCN-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52
     SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir b/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir
index 69895833efccb..a74b44ac9a31c 100644
--- a/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir
@@ -18,9 +18,9 @@ body: |
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
-  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.1, align 4, addrspace 5)
   ; EXPANDED-NEXT:   S_NOP 0, implicit renamable $vgpr0_lo16
-  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.0, align 4, addrspace 5)
   ; EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.1:
@@ -29,8 +29,8 @@ body: |
   ; EXPANDED-NEXT:   S_NOP 1
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.2:
-  ; EXPANDED-NEXT:   $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5)
-  ; EXPANDED-NEXT:   $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.0, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.1, align 4, addrspace 5)
   ; EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
   ;
   ; SRAMECC-EXPANDED-LABEL: name: spill_restore_vgpr16
@@ -38,9 +38,9 @@ body: |
   ; SRAMECC-EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
-  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.1, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit renamable $vgpr0_lo16
-  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.0, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT: bb.1:
@@ -49,9 +49,9 @@ body: |
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 1
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT: bb.2:
-  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.0, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   $vgpr0_lo16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
-  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.1, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   $vgpr0_hi16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
    bb.0:
@@ -86,9 +86,9 @@ body: |
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
-  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.1, align 4, addrspace 5)
   ; EXPANDED-NEXT:   S_NOP 0, implicit renamable $vgpr0_lo16
-  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.0, align 4, addrspace 5)
   ; EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.1:
@@ -98,8 +98,8 @@ body: |
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.2:
   ; EXPANDED-NEXT:   S_NOP 1
-  ; EXPANDED-NEXT:   $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5)
-  ; EXPANDED-NEXT:   $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.0, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.1, align 4, addrspace 5)
   ; EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
   ;
   ; SRAMECC-EXPANDED-LABEL: name: spill_restore_vgpr16_middle_of_block
@@ -107,9 +107,9 @@ body: |
   ; SRAMECC-EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
-  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.1, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit renamable $vgpr0_lo16
-  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.0, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT: bb.1:
@@ -119,9 +119,9 @@ body: |
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT: bb.2:
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 1
-  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.0, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   $vgpr0_lo16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
-  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.1, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   $vgpr0_hi16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
    bb.0:
@@ -157,9 +157,9 @@ body: |
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
-  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.1, align 4, addrspace 5)
   ; EXPANDED-NEXT:   S_NOP 0, implicit renamable $vgpr0_lo16
-  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.0, align 4, addrspace 5)
   ; EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.1:
@@ -168,17 +168,17 @@ body: |
   ; EXPANDED-NEXT:   S_NOP 1
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.2:
-  ; EXPANDED-NEXT:   $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5)
-  ; EXPANDED-NEXT:   $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.0, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.1, align 4, addrspace 5)
   ;
   ; SRAMECC-EXPANDED-LABEL: name: spill_restore_vgpr16_end_of_block
   ; SRAMECC-EXPANDED: bb.0:
   ; SRAMECC-EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
-  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.1, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit renamable $vgpr0_lo16
-  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.0, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT: bb.1:
@@ -187,9 +187,9 @@ body: |
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 1
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT: bb.2:
-  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.0, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   $vgpr0_lo16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
-  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.1, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   $vgpr0_hi16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
    bb.0:
      successors: %bb.1(0x80000000)
diff --git a/llvm/test/CodeGen/AMDGPU/spillv16.ll b/llvm/test/CodeGen/AMDGPU/spillv16.ll
index c16793675f6a2..62ea4dc7a1865 100644
--- a/llvm/test/CodeGen/AMDGPU/spillv16.ll
+++ b/llvm/test/CodeGen/AMDGPU/spillv16.ll
@@ -64,11 +64,11 @@ define void @spill_i16_alu() {
 ; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
 ; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v0.l, 0x7b, v0.l
-; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 offset:2 ; 2-byte Folded Spill
+; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 offset:2 nv ; 2-byte Folded Spill
 ; GFX1250-TRUE16-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-TRUE16-NEXT:    ;;#ASMSTART
 ; GFX1250-TRUE16-NEXT:    ;;#ASMEND
-; GFX1250-TRUE16-NEXT:    scratch_load_u16 v1, off, s32 offset:2 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX1250-TRUE16-NEXT:    scratch_load_u16 v1, off, s32 offset:2 th:TH_LOAD_LU nv ; 2-byte Folded Reload
 ; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v1.l
 ; GFX1250-TRUE16-NEXT:    s_wait_xcnt 0x0
@@ -83,11 +83,11 @@ define void @spill_i16_alu() {
 ; GFX1250-FAKE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
 ; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-FAKE16-NEXT:    v_add_nc_u16 v0, 0x7b, v0
-; GFX1250-FAKE16-NEXT:    scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
+; GFX1250-FAKE16-NEXT:    scratch_store_b32 off, v0, s32 offset:4 nv ; 4-byte Folded Spill
 ; GFX1250-FAKE16-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-FAKE16-NEXT:    ;;#ASMSTART
 ; GFX1250-FAKE16-NEXT:    ;;#ASMEND
-; GFX1250-FAKE16-NEXT:    scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-FAKE16-NEXT:    scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU nv ; 4-byte Folded Reload
 ; GFX1250-FAKE16-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-FAKE16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
@@ -207,13 +207,13 @@ define void @spill_i16_alu_two_vals() {
 ; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
 ; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v0.l, 0x7b, v0.l
-; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 offset:6 ; 2-byte Folded Spill
+; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 offset:6 nv ; 2-byte Folded Spill
 ; GFX1250-TRUE16-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-TRUE16-NEXT:    ;;#ASMSTART
 ; GFX1250-TRUE16-NEXT:    ;;#ASMEND
 ; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 offset:4 scope:SCOPE_SYS
 ; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_load_u16 v1, off, s32 offset:6 th:TH_LOAD_LU ; 2-byte Folded Reload
+; GFX1250-TRUE16-NEXT:    scratch_load_u16 v1, off, s32 offset:6 th:TH_LOAD_LU nv ; 2-byte Folded Reload
 ; GFX1250-TRUE16-NEXT:    v_add_nc_u16 v0.l, 0x7b, v0.l
 ; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-TRUE16-NEXT:    v_mov_b16_e32 v0.h, v1.l
@@ -232,13 +232,13 @@ define void @spill_i16_alu_two_vals() {
 ; GFX1250-FAKE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
 ; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-FAKE16-NEXT:    v_add_nc_u16 v0, 0x7b, v0
-; GFX1250-FAKE16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-FAKE16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 nv ; 4-byte Folded Spill
 ; GFX1250-FAKE16-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-FAKE16-NEXT:    ;;#ASMSTART
 ; GFX1250-FAKE16-NEXT:    ;;#ASMEND
 ; GFX1250-FAKE16-NEXT:    scratch_load_u16 v0, off, s32 offset:4 scope:SCOPE_SYS
 ; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-FAKE16-NEXT:    scratch_load_b32 v1, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-FAKE16-NEXT:    scratch_load_b32 v1, off, s32 offset:8 th:TH_LOAD_LU nv ; 4-byte Folded Reload
 ; GFX1250-FAKE16-NEXT:    v_add_nc_u16 v0, 0x7b, v0
 ; GFX1250-FAKE16-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
@@ -326,11 +326,11 @@ define void @spill_i16() {
 ; GFX1250-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
 ; GFX1250-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-NEXT:    scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v0, s32 offset:4 nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    ;;#ASMSTART
 ; GFX1250-NEXT:    ;;#ASMEND
-; GFX1250-NEXT:    scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
@@ -405,11 +405,11 @@ define void @spill_half() {
 ; GFX1250-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
 ; GFX1250-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-NEXT:    scratch_store_b32 off, v0, s32 offset:4 ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v0, s32 offset:4 nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    ;;#ASMSTART
 ; GFX1250-NEXT:    ;;#ASMEND
-; GFX1250-NEXT:    scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v0, off, s32 offset:4 th:TH_LOAD_LU nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
@@ -484,11 +484,11 @@ define void @spill_i16_from_v2i16() {
 ; GFX1250-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-NEXT:    scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
 ; GFX1250-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v0, s32 offset:8 nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    ;;#ASMSTART
 ; GFX1250-NEXT:    ;;#ASMEND
-; GFX1250-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-NEXT:    scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
@@ -585,19 +585,19 @@ define void @spill_2xi16_from_v2i16() {
 ; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
 ; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-TRUE16-NEXT:    s_clause 0x1 ; 4-byte Folded Spill
-; GFX1250-TRUE16-NEXT:    scratch_store_b32 off, v0, s32 offset:12
+; GFX1250-TRUE16-NEXT:    scratch_store_b32 off, v0, s32 offset:12 nv
 ; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
 ; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 nv ; 4-byte Folded Spill
 ; GFX1250-TRUE16-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-TRUE16-NEXT:    ;;#ASMSTART
 ; GFX1250-TRUE16-NEXT:    ;;#ASMEND
-; GFX1250-TRUE16-NEXT:    scratch_load_b32 v0, off, s32 offset:12 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-NEXT:    scratch_load_b32 v0, off, s32 offset:12 th:TH_LOAD_LU nv ; 4-byte Folded Reload
 ; GFX1250-TRUE16-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
 ; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU nv ; 4-byte Folded Reload
 ; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
 ; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
@@ -610,19 +610,19 @@ define void @spill_2xi16_from_v2i16() {
 ; GFX1250-FAKE16-NEXT:    scratch_load_u16 v0, off, s32 offset:2 scope:SCOPE_SYS
 ; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-FAKE16-NEXT:    s_clause 0x1 ; 4-byte Folded Spill
-; GFX1250-FAKE16-NEXT:    scratch_store_b32 off, v0, s32 offset:8
+; GFX1250-FAKE16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 nv
 ; GFX1250-FAKE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
 ; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-FAKE16-NEXT:    scratch_store_b32 off, v0, s32 offset:12 ; 4-byte Folded Spill
+; GFX1250-FAKE16-NEXT:    scratch_store_b32 off, v0, s32 offset:12 nv ; 4-byte Folded Spill
 ; GFX1250-FAKE16-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-FAKE16-NEXT:    ;;#ASMSTART
 ; GFX1250-FAKE16-NEXT:    ;;#ASMEND
-; GFX1250-FAKE16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-FAKE16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU nv ; 4-byte Folded Reload
 ; GFX1250-FAKE16-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-FAKE16-NEXT:    scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
 ; GFX1250-FAKE16-NEXT:    s_wait_storecnt 0x0
-; GFX1250-FAKE16-NEXT:    scratch_load_b32 v0, off, s32 offset:12 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-FAKE16-NEXT:    scratch_load_b32 v0, off, s32 offset:12 th:TH_LOAD_LU nv ; 4-byte Folded Reload
 ; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-FAKE16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
 ; GFX1250-FAKE16-NEXT:    s_wait_storecnt 0x0
@@ -721,14 +721,14 @@ define void @spill_2xi16_from_v2i16_one_free_reg() {
 ; GFX1250-TRUE16-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-TRUE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
 ; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-TRUE16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 nv ; 4-byte Folded Spill
 ; GFX1250-TRUE16-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-TRUE16-NEXT:    ;;#ASMSTART
 ; GFX1250-TRUE16-NEXT:    ;;#ASMEND
 ; GFX1250-TRUE16-NEXT:    v_mov_b16_e32 v0.l, v7.l
 ; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 offset:2 scope:SCOPE_SYS
 ; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
-; GFX1250-TRUE16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-TRUE16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU nv ; 4-byte Folded Reload
 ; GFX1250-TRUE16-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-TRUE16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
 ; GFX1250-TRUE16-NEXT:    s_wait_storecnt 0x0
@@ -743,13 +743,13 @@ define void @spill_2xi16_from_v2i16_one_free_reg() {
 ; GFX1250-FAKE16-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-FAKE16-NEXT:    scratch_load_u16 v0, off, s32 scope:SCOPE_SYS
 ; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-FAKE16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-FAKE16-NEXT:    scratch_store_b32 off, v0, s32 offset:8 nv ; 4-byte Folded Spill
 ; GFX1250-FAKE16-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-FAKE16-NEXT:    ;;#ASMSTART
 ; GFX1250-FAKE16-NEXT:    ;;#ASMEND
 ; GFX1250-FAKE16-NEXT:    scratch_store_b16 off, v7, s32 offset:2 scope:SCOPE_SYS
 ; GFX1250-FAKE16-NEXT:    s_wait_storecnt 0x0
-; GFX1250-FAKE16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-FAKE16-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU nv ; 4-byte Folded Reload
 ; GFX1250-FAKE16-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-FAKE16-NEXT:    scratch_store_b16 off, v0, s32 scope:SCOPE_SYS
 ; GFX1250-FAKE16-NEXT:    s_wait_storecnt 0x0
@@ -814,11 +814,11 @@ define void @spill_v2i16() {
 ; GFX1250-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-NEXT:    scratch_load_b32 v0, off, s32 offset:4 scope:SCOPE_SYS
 ; GFX1250-NEXT:    s_wait_loadcnt 0x0
-; GFX1250-NEXT:    scratch_store_b32 off, v0, s32 offset:8 ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v0, s32 offset:8 nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    ;;#ASMSTART
 ; GFX1250-NEXT:    ;;#ASMEND
-; GFX1250-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v0, off, s32 offset:8 th:TH_LOAD_LU nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-NEXT:    scratch_store_b32 off, v0, s32 offset:4 scope:SCOPE_SYS
diff --git a/llvm/test/CodeGen/AMDGPU/spillv16.mir b/llvm/test/CodeGen/AMDGPU/spillv16.mir
index ba2d926eb8883..0c20d7a0bf12d 100644
--- a/llvm/test/CodeGen/AMDGPU/spillv16.mir
+++ b/llvm/test/CodeGen/AMDGPU/spillv16.mir
@@ -34,8 +34,8 @@ body: |
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
-  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, addrspace 5)
-  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, addrspace 5)
+  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.1, addrspace 5)
+  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.0, addrspace 5)
   ; EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.1:
@@ -44,8 +44,8 @@ body: |
   ; EXPANDED-NEXT:   S_NOP 1
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.2:
-  ; EXPANDED-NEXT:   $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, addrspace 5)
-  ; EXPANDED-NEXT:   $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, addrspace 5)
+  ; EXPANDED-NEXT:   $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.0, addrspace 5)
+  ; EXPANDED-NEXT:   $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.1, addrspace 5)
   ; EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
   ;
   ; SRAMECC-EXPANDED-LABEL: name: spill_restore_vgpr16
@@ -53,8 +53,8 @@ body: |
   ; SRAMECC-EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
-  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.1, addrspace 5)
-  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s16) into %stack.0, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.1, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.0, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT: bb.1:
@@ -63,9 +63,9 @@ body: |
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 1
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT: bb.2:
-  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.0, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.0, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   $vgpr0_lo16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
-  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: (load (s16) from %stack.1, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.1, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   $vgpr0_hi16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
   bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
index cc261b0da4a8f..98918b987026d 100644
--- a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
+++ b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
@@ -20,13 +20,13 @@ body:             |
     ; GCN: liveins: $sgpr20, $vgpr1
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
     ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
     ; GCN-NEXT: $vgpr0 = COPY killed renamable $vgpr1, implicit $exec
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: SI_RETURN implicit $vgpr0
     $vgpr0 = IMPLICIT_DEF
@@ -53,8 +53,8 @@ body:             |
     ; GCN: liveins: $sgpr20, $sgpr21, $vgpr1
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
     ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
@@ -62,8 +62,8 @@ body:             |
     ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr21, 0, $vgpr2
     ; GCN-NEXT: $vgpr0 = COPY $vgpr1, implicit $exec
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
     $vgpr0 = IMPLICIT_DEF
@@ -92,13 +92,13 @@ body:             |
     ; GCN: liveins: $sgpr20, $vgpr1
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
     ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2
     ; GCN-NEXT: $vgpr0 = COPY $vgpr1, implicit $exec
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
     $vgpr2 = IMPLICIT_DEF
@@ -124,14 +124,14 @@ body:             |
     ; GCN: liveins: $sgpr20, $vgpr1
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
     ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2
     ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0, implicit $exec
     ; GCN-NEXT: $vgpr0 = COPY killed $vgpr1, implicit $exec
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: SI_RETURN implicit $vgpr0
     $vgpr40 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir
index 69cf924548ed8..66d9b4f2c7114 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir
+++ b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir
@@ -20,9 +20,9 @@ body:             |
     ; GCN: liveins: $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s96) into %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: $vgpr51 = COPY $vgpr55, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51
-    ; GCN-NEXT: $vgpr48_vgpr49_vgpr50 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s96) from %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: $vgpr48_vgpr49_vgpr50 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr54, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -46,10 +46,10 @@ body:             |
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s64) into %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: $vgpr51 = COPY $vgpr54, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51
     ; GCN-NEXT: $vgpr50 = COPY $vgpr55, implicit $vgpr48_vgpr49_vgpr50_vgpr51
-    ; GCN-NEXT: $vgpr48_vgpr49 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s64) from %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: $vgpr48_vgpr49 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -74,11 +74,11 @@ body:             |
     ; GCN-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $vgpr51 = COPY $vgpr53, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51
     ; GCN-NEXT: $vgpr50 = COPY $vgpr54, implicit $vgpr48_vgpr49_vgpr50_vgpr51
     ; GCN-NEXT: $vgpr49 = COPY $vgpr55, implicit $vgpr48_vgpr49_vgpr50_vgpr51
-    ; GCN-NEXT: $vgpr48 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr48 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -130,9 +130,9 @@ body:             |
     ; GCN: liveins: $agpr30, $agpr31, $agpr24_agpr25, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s96) into %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: $agpr29 = COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29
-    ; GCN-NEXT: $agpr26_agpr27_agpr28 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: (load (s96) from %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: $agpr26_agpr27_agpr28 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25
     SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -156,10 +156,10 @@ body:             |
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s64) into %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: $agpr29 = COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29
     ; GCN-NEXT: $agpr28 = COPY $agpr31, implicit $agpr26_agpr27_agpr28_agpr29
-    ; GCN-NEXT: $agpr26_agpr27 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: (load (s64) from %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: $agpr26_agpr27 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25
     SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -184,11 +184,11 @@ body:             |
     ; GCN-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $agpr29 = COPY $agpr25, implicit-def $agpr26_agpr27_agpr28_agpr29
     ; GCN-NEXT: $agpr28 = COPY $agpr30, implicit $agpr26_agpr27_agpr28_agpr29
     ; GCN-NEXT: $agpr27 = COPY $agpr31, implicit $agpr26_agpr27_agpr28_agpr29
-    ; GCN-NEXT: $agpr26 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: (load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $agpr26 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24
     SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
index 572a875941b22..8b8d0a11941f7 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
@@ -28,7 +28,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; MUBUF-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -47,7 +47,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -65,7 +65,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -83,7 +83,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -127,8 +127,8 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; MUBUF-NEXT:   $vgpr2 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.1 + 4, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -147,7 +147,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_ADD_U32_e32 8200, $vgpr2, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.1, align 4, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.1, align 4, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -165,7 +165,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   $vgpr2 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.1, align 4, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.1, align 4, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -183,8 +183,8 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $vgpr2 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.1 + 4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -226,7 +226,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; MUBUF-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -244,7 +244,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -261,7 +261,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -278,7 +278,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -320,8 +320,8 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; MUBUF-NEXT:   $vgpr2 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -339,7 +339,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_ADD_U32_e32 8200, $vgpr2, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.1, align 4, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.1, align 4, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -356,7 +356,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   $vgpr2 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.1, align 4, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.1, align 4, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -373,8 +373,8 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $vgpr2 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -416,10 +416,10 @@ body:             |
   ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; MUBUF-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -437,11 +437,11 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
-  ; GFX9-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -459,10 +459,10 @@ body:             |
   ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
-  ; GFX10-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -480,10 +480,10 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -528,11 +528,11 @@ body:             |
   ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; MUBUF-NEXT:   $vgpr2 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -550,11 +550,11 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_ADD_U32_e32 8200, $vgpr2, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.1, align 4, addrspace 5)
-  ; GFX9-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.1, align 4, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -572,10 +572,10 @@ body:             |
   ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   $vgpr2 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (load (s64) from %stack.1, align 4, addrspace 5)
-  ; GFX10-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.1, align 4, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -593,11 +593,11 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr2 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: (load (s32) from %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -642,12 +642,12 @@ body:             |
   ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; MUBUF-NEXT:   $vgpr3 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 8, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 8, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -665,11 +665,11 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   $vgpr3 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr3 = V_ADD_U32_e32 8200, $vgpr3, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.1, align 4, addrspace 5)
-  ; GFX9-FLATSCR-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.1, align 4, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -687,10 +687,10 @@ body:             |
   ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   $vgpr3 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (load (s96) from %stack.1, align 4, addrspace 5)
-  ; GFX10-FLATSCR-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.1, align 4, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -708,12 +708,12 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr3 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: (load (s32) from %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 4, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (load (s32) from %stack.1 + 8, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 8, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -758,10 +758,10 @@ body:             |
   ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; MUBUF-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -779,11 +779,11 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-  ; GFX9-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -801,10 +801,10 @@ body:             |
   ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-  ; GFX10-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -822,10 +822,10 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -870,11 +870,11 @@ body:             |
   ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; MUBUF-NEXT:   $vgpr2 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.1 + 4, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -892,11 +892,11 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_ADD_U32_e32 8200, $vgpr2, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.1, align 4, addrspace 5)
-  ; GFX9-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.1, align 4, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -914,10 +914,10 @@ body:             |
   ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   $vgpr2 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64) into %stack.1, align 4, addrspace 5)
-  ; GFX10-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.1, align 4, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -935,11 +935,11 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr2 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.1 + 4, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -984,12 +984,12 @@ body:             |
   ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; MUBUF-NEXT:   $vgpr3 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1 + 4, addrspace 5)
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr2, killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.1 + 8, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr2, killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 8, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -1007,11 +1007,11 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   $vgpr3 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr3 = V_ADD_U32_e32 8200, $vgpr3, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX3 $vgpr0_vgpr1_vgpr2, killed $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.1, align 4, addrspace 5)
-  ; GFX9-FLATSCR-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX3 $vgpr0_vgpr1_vgpr2, killed $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.1, align 4, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -1029,10 +1029,10 @@ body:             |
   ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   $vgpr3 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX3 $vgpr0_vgpr1_vgpr2, killed $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: (store (s96) into %stack.1, align 4, addrspace 5)
-  ; GFX10-FLATSCR-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX3 $vgpr0_vgpr1_vgpr2, killed $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.1, align 4, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -1050,12 +1050,12 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr3 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1 + 4, addrspace 5)
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr2, killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: (store (s32) into %stack.1 + 8, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr2, killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 8, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -1203,10 +1203,10 @@ body:             |
   ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; MUBUF-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -1224,13 +1224,13 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
   ; GFX9-FLATSCR-NEXT:   S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   $sgpr4 = S_BITSET0_B32 0, $sgpr4
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; GFX9-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -1248,13 +1248,13 @@ body:             |
   ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
   ; GFX10-FLATSCR-NEXT:   S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   $sgpr4 = S_BITSET0_B32 0, $sgpr4
   ; GFX10-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
   ; GFX10-FLATSCR-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; GFX10-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -1272,10 +1272,10 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
index edea344a66a3c..4fdd4bac39898 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
@@ -17,7 +17,7 @@ body:             |
     ; CHECK-LABEL: name: spill_v32
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: S_NOP 0, implicit $vgpr0
     SI_SPILL_V32_SAVE $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     S_NOP 0, implicit $vgpr0
@@ -39,7 +39,7 @@ body:             |
     ; CHECK-LABEL: name: spill_v32_kill
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
 ...
 
@@ -59,8 +59,8 @@ body:             |
     ; CHECK-LABEL: name: spill_v64
     ; CHECK: liveins: $vgpr0_vgpr1
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     ; CHECK-NEXT: S_NOP 0, implicit $vgpr0_vgpr1
     SI_SPILL_V64_SAVE $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     S_NOP 0, implicit $vgpr0_vgpr1
@@ -82,8 +82,8 @@ body:             |
     ; CHECK-LABEL: name: spill_v64_kill
     ; CHECK: liveins: $vgpr0_vgpr1
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
 ...
 
@@ -105,8 +105,8 @@ body:             |
     ; CHECK-LABEL: name: spill_v64_undef_sub1_killed
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
 ...
 
@@ -126,8 +126,8 @@ body:             |
     ; CHECK-LABEL: name: spill_v64_undef_sub0_killed
     ; CHECK: liveins: $vgpr1
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: (store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: (store (s32) into %stack.0 + 4, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
     SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
 ...
 
@@ -147,10 +147,10 @@ body:             |
     ; CHECK-LABEL: name: spill_v128_kill
     ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 4, addrspace 5)
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: (store (s32) into %stack.0 + 8, addrspace 5)
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: (store (s32) into %stack.0 + 12, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
     SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, addrspace 5)
 ...
 
diff --git a/llvm/test/CodeGen/AMDGPU/wait-xcnt-atomic-rmw-optimization.ll b/llvm/test/CodeGen/AMDGPU/wait-xcnt-atomic-rmw-optimization.ll
index e3adf737d6e5c..ca43fcc4d2cd0 100644
--- a/llvm/test/CodeGen/AMDGPU/wait-xcnt-atomic-rmw-optimization.ll
+++ b/llvm/test/CodeGen/AMDGPU/wait-xcnt-atomic-rmw-optimization.ll
@@ -891,7 +891,7 @@ define amdgpu_kernel void @atomic_rmw_across_basic_blocks(ptr addrspace(1) %ptr,
 ; GFX1250-NEXT:    v_writelane_b32 v2, s4, 0
 ; GFX1250-NEXT:    v_writelane_b32 v2, s5, 1
 ; GFX1250-NEXT:    s_or_saveexec_b32 s6, -1
-; GFX1250-NEXT:    scratch_store_b32 off, v2, off ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v2, off nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s6
 ; GFX1250-NEXT:    v_mov_b32_e32 v0, 0
@@ -921,7 +921,7 @@ define amdgpu_kernel void @atomic_rmw_across_basic_blocks(ptr addrspace(1) %ptr,
 ; GFX1250-NEXT:    s_cbranch_scc1 .LBB16_2
 ; GFX1250-NEXT:  ; %bb.1: ; %then
 ; GFX1250-NEXT:    s_or_saveexec_b32 s6, -1
-; GFX1250-NEXT:    scratch_load_b32 v2, off, off ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v2, off, off nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s6
 ; GFX1250-NEXT:    s_wait_loadcnt 0x0
@@ -973,13 +973,13 @@ define amdgpu_kernel void @atomic_rmw_in_loop(ptr addrspace(1) %ptr, i32 %n) {
 ; GFX1250-NEXT:    v_writelane_b32 v2, s1, 2
 ; GFX1250-NEXT:    v_writelane_b32 v2, s0, 3
 ; GFX1250-NEXT:    s_or_saveexec_b32 s6, -1
-; GFX1250-NEXT:    scratch_store_b32 off, v2, off ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v2, off nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s6
 ; GFX1250-NEXT:  .LBB17_1: ; %loop
 ; GFX1250-NEXT:    ; =>This Inner Loop Header: Depth=1
 ; GFX1250-NEXT:    s_or_saveexec_b32 s6, -1
-; GFX1250-NEXT:    scratch_load_b32 v2, off, off ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v2, off, off nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s6
 ; GFX1250-NEXT:    s_wait_loadcnt 0x0
@@ -1015,7 +1015,7 @@ define amdgpu_kernel void @atomic_rmw_in_loop(ptr addrspace(1) %ptr, i32 %n) {
 ; GFX1250-NEXT:    v_writelane_b32 v2, s0, 3
 ; GFX1250-NEXT:    s_mov_b32 s6, exec_lo
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, -1
-; GFX1250-NEXT:    scratch_store_b32 off, v2, off ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v2, off nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s6
 ; GFX1250-NEXT:    s_cbranch_scc1 .LBB17_1
@@ -1078,13 +1078,13 @@ define amdgpu_kernel void @atomic_rmw_with_branch(ptr addrspace(1) %ptr, i32 %co
 ; GFX1250-NEXT:    v_writelane_b32 v2, s0, 2
 ; GFX1250-NEXT:    s_mov_b32 s6, exec_lo
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, -1
-; GFX1250-NEXT:    scratch_store_b32 off, v2, off ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v2, off nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s6
 ; GFX1250-NEXT:    s_cbranch_scc1 .LBB18_3
 ; GFX1250-NEXT:  .LBB18_1: ; %Flow
 ; GFX1250-NEXT:    s_or_saveexec_b32 s6, -1
-; GFX1250-NEXT:    scratch_load_b32 v2, off, off ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v2, off, off nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s6
 ; GFX1250-NEXT:    s_wait_loadcnt 0x0
@@ -1096,7 +1096,7 @@ define amdgpu_kernel void @atomic_rmw_with_branch(ptr addrspace(1) %ptr, i32 %co
 ; GFX1250-NEXT:    s_cbranch_vccnz .LBB18_4
 ; GFX1250-NEXT:  ; %bb.2: ; %bb1
 ; GFX1250-NEXT:    s_or_saveexec_b32 s6, -1
-; GFX1250-NEXT:    scratch_load_b32 v2, off, off ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v2, off, off nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s6
 ; GFX1250-NEXT:    s_wait_loadcnt 0x0
@@ -1117,7 +1117,7 @@ define amdgpu_kernel void @atomic_rmw_with_branch(ptr addrspace(1) %ptr, i32 %co
 ; GFX1250-NEXT:    s_branch .LBB18_4
 ; GFX1250-NEXT:  .LBB18_3: ; %bb2
 ; GFX1250-NEXT:    s_or_saveexec_b32 s6, -1
-; GFX1250-NEXT:    scratch_load_b32 v2, off, off ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v2, off, off nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s6
 ; GFX1250-NEXT:    s_wait_loadcnt 0x0
@@ -1138,13 +1138,13 @@ define amdgpu_kernel void @atomic_rmw_with_branch(ptr addrspace(1) %ptr, i32 %co
 ; GFX1250-NEXT:    s_mov_b32 s0, 0
 ; GFX1250-NEXT:    v_writelane_b32 v2, s0, 2
 ; GFX1250-NEXT:    s_or_saveexec_b32 s6, -1
-; GFX1250-NEXT:    scratch_store_b32 off, v2, off ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v2, off nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s6
 ; GFX1250-NEXT:    s_branch .LBB18_1
 ; GFX1250-NEXT:  .LBB18_4: ; %merge
 ; GFX1250-NEXT:    s_or_saveexec_b32 s6, -1
-; GFX1250-NEXT:    scratch_load_b32 v2, off, off ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v2, off, off nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s6
 ; GFX1250-NEXT:    s_wait_loadcnt 0x0
@@ -1199,7 +1199,7 @@ define amdgpu_kernel void @atomic_rmw_fallthrough(ptr addrspace(1) %ptr) {
 ; GFX1250-NEXT:    v_writelane_b32 v2, s2, 0
 ; GFX1250-NEXT:    v_writelane_b32 v2, s3, 1
 ; GFX1250-NEXT:    s_or_saveexec_b32 s6, -1
-; GFX1250-NEXT:    scratch_store_b32 off, v2, off ; 4-byte Folded Spill
+; GFX1250-NEXT:    scratch_store_b32 off, v2, off nv ; 4-byte Folded Spill
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s6
 ; GFX1250-NEXT:    v_mov_b32_e32 v0, 0
@@ -1226,7 +1226,7 @@ define amdgpu_kernel void @atomic_rmw_fallthrough(ptr addrspace(1) %ptr) {
 ; GFX1250-NEXT:    s_wait_loadcnt 0x0
 ; GFX1250-NEXT:  ; %bb.1: ; %next
 ; GFX1250-NEXT:    s_or_saveexec_b32 s6, -1
-; GFX1250-NEXT:    scratch_load_b32 v2, off, off ; 4-byte Folded Reload
+; GFX1250-NEXT:    scratch_load_b32 v2, off, off nv ; 4-byte Folded Reload
 ; GFX1250-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-NEXT:    s_mov_b32 exec_lo, s6
 ; GFX1250-NEXT:    s_wait_loadcnt 0x0
diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir b/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir
index adba762235d8c..405622c4bbf6d 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir
@@ -24,14 +24,14 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; CHECK-LABEL: name: save_inactive_lanes_non_csr_vgpr
-    ; CHECK: liveins: $vgpr0
+    ; CHECK: liveins: $vgpr0, $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1
     ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 14, implicit $exec
     ; CHECK-NEXT: $exec_lo = S_XOR_B32 $sgpr0, -1, implicit-def $scc
-    ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
     ; CHECK-NEXT: SI_RETURN implicit killed $vgpr0
     renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
@@ -65,9 +65,9 @@ body:             |
     ; CHECK: liveins: $vgpr40
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $sgpr0 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: $vgpr40 = V_MOV_B32_e32 14, implicit $exec
-    ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
     ; CHECK-NEXT: SI_RETURN
     renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
@@ -102,13 +102,13 @@ body:             |
     ; CHECK: liveins: $sgpr20, $vgpr191, $vgpr192
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vcc_lo = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr192, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr192, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1
     ; CHECK-NEXT: $vgpr192 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr192
     ; CHECK-NEXT: $sgpr20 = S_MOV_B32 14, implicit $exec
     ; CHECK-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr192, 0
     ; CHECK-NEXT: $exec_lo = S_XOR_B32 $vcc_lo, -1, implicit-def $scc
-    ; CHECK-NEXT: $vgpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo
     ; CHECK-NEXT: SI_RETURN
     $vgpr192 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr192
@@ -145,11 +145,11 @@ body:             |
     ; CHECK: liveins: $sgpr20, $vgpr191
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vcc_lo = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr191, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr191, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: $vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191
     ; CHECK-NEXT: $sgpr20 = S_MOV_B32 14, implicit $exec
     ; CHECK-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr191, 0
-    ; CHECK-NEXT: $vgpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo
     ; CHECK-NEXT: SI_RETURN
     $vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191
@@ -191,21 +191,21 @@ body:             |
     liveins: $sgpr20, $vgpr0, $vgpr1, $vgpr191
 
     ; CHECK-LABEL: name: vgpr_and_sgpr_csr
-    ; CHECK: liveins: $sgpr20, $vgpr0, $vgpr1, $vgpr40, $vgpr49
+    ; CHECK: liveins: $sgpr20, $vgpr0, $vgpr1, $vgpr40, $vgpr49, $vgpr49
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vcc_lo = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr49, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr49, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
     ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $sgpr20
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr49, implicit-def $sgpr40
     ; CHECK-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
-    ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
+    ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_XOR_B32 $vcc_lo, -1, implicit-def $scc
-    ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
-    ; CHECK-NEXT: $vgpr49 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr49 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo
     ; CHECK-NEXT: SI_RETURN
     $vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191
@@ -248,22 +248,22 @@ body:             |
     liveins: $sgpr20, $vgpr0, $vgpr1, $vgpr191
 
     ; CHECK-LABEL: name: split_orig_exec
-    ; CHECK: liveins: $sgpr20, $vgpr0, $vgpr1, $vgpr40, $vgpr49
+    ; CHECK: liveins: $sgpr20, $vgpr0, $vgpr1, $vgpr40, $vgpr49, $vgpr49
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vcc_lo = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr49, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr49, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
     ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $sgpr20
     ; CHECK-NEXT: $sgpr3 = COPY $vcc_lo
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr49, implicit-def $sgpr40
     ; CHECK-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
-    ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
+    ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_XOR_B32 $sgpr3, -1, implicit-def $scc
-    ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.0, addrspace 5)
-    ; CHECK-NEXT: $vgpr49 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr49 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr3
     ; CHECK-NEXT: SI_RETURN
     $vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191
@@ -298,29 +298,29 @@ machineFunctionInfo:
 body:             |
   bb.0:
     ; CHECK-LABEL: name: vgpr_superregs
-    ; CHECK: liveins: $vgpr0, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr40, $vgpr41, $vgpr42
+    ; CHECK: liveins: $vgpr0, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr0, $vgpr2, $vgpr3, $vgpr4, $vgpr5
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.2, addrspace 5)
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr4, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.3, addrspace 5)
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr4, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.5, addrspace 5)
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr41, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.6, addrspace 5)
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr42, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.7, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr41, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr42, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
     ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 14, implicit $exec
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr2_vgpr3_vgpr4_vgpr5, implicit-def $vgpr40_vgpr41_vgpr42
-    ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.5, addrspace 5)
-    ; CHECK-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.6, addrspace 5)
-    ; CHECK-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.7, addrspace 5)
+    ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
+    ; CHECK-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
+    ; CHECK-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_XOR_B32 $sgpr0, -1, implicit-def $scc
-    ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
-    ; CHECK-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
-    ; CHECK-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.2, addrspace 5)
-    ; CHECK-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.3, addrspace 5)
-    ; CHECK-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.4, addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; CHECK-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; CHECK-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; CHECK-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
     ; CHECK-NEXT: SI_RETURN implicit killed $vgpr0
     renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
@@ -396,11 +396,11 @@ body:             |
   ; CHECK-LABEL: name: multiple_blocks
   ; CHECK: bb.0:
   ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
-  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1
+  ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr0, $vgpr1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $vcc_lo = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
-  ; CHECK-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
+  ; CHECK-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+  ; CHECK-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
   ; CHECK-NEXT:   $exec_lo = S_MOV_B32 -1
   ; CHECK-NEXT:   $sgpr1 = S_MOV_B32 $exec_lo
   ; CHECK-NEXT:   V_CMPX_EQ_U32_nosdst_e64 $vgpr0, $vgpr1, implicit-def $exec, implicit $exec
@@ -419,8 +419,8 @@ body:             |
   ; CHECK-NEXT:   $exec_lo = S_OR_B32 $exec_lo, killed renamable $sgpr1, implicit-def $scc
   ; CHECK-NEXT:   renamable $vgpr0 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr0, $vcc_lo, implicit $exec
   ; CHECK-NEXT:   $exec_lo = S_XOR_B32 $vcc_lo, -1, implicit-def $scc
-  ; CHECK-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
-  ; CHECK-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (load (s32) from %stack.1, addrspace 5)
+  ; CHECK-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
   ; CHECK-NEXT:   $exec_lo = S_MOV_B32 $vcc_lo
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
   bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
index c4f8e332a429c..37105efa3333c 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions.ll
@@ -114,8 +114,8 @@ define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) {
 ; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 vcc_lo, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x1 ; 8-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
 ; GFX1250-DAGISEL-NEXT:    v_dual_cndmask_b32 v0, 5, v0 :: v_dual_cndmask_b32 v1, 3, v1
@@ -123,8 +123,8 @@ define amdgpu_gfx_whole_wave i32 @basic_test(i1 %active, i32 %a, i32 %b) {
 ; GFX1250-DAGISEL-NEXT:    v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
 ; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, vcc_lo, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x1 ; 8-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, vcc_lo
 ; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
@@ -239,8 +239,8 @@ define amdgpu_gfx_whole_wave i32 @single_use_of_active(i1 %active, i32 %a, i32 %
 ; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 vcc_lo, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x1 ; 8-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
 ; GFX1250-DAGISEL-NEXT:    v_cndmask_b32_e32 v1, 17, v1, vcc_lo
@@ -248,8 +248,8 @@ define amdgpu_gfx_whole_wave i32 @single_use_of_active(i1 %active, i32 %a, i32 %
 ; GFX1250-DAGISEL-NEXT:    v_mov_b32_dpp v0, v1 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
 ; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, vcc_lo, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x1 ; 8-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, vcc_lo
 ; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
@@ -338,12 +338,12 @@ define amdgpu_gfx_whole_wave i32 @unused_active(i1 %active, i32 %a, i32 %b) {
 ; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 nv ; 4-byte Folded Spill
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
 ; GFX1250-DAGISEL-NEXT:    v_mov_b32_e32 v0, 14
 ; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, s0, -1
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32 nv ; 4-byte Folded Reload
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
@@ -518,13 +518,13 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) {
 ; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 vcc_lo, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3 ; 16-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2, s32
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 offset:4
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:8
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49, s32 offset:16
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2, s32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 offset:4 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:8 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49, s32 offset:16 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40, s32 offset:12 ; 4-byte Folded Spill
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40, s32 offset:12 nv ; 4-byte Folded Spill
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    ;;#ASMSTART
 ; GFX1250-DAGISEL-NEXT:    ; clobber CSR
@@ -533,7 +533,7 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) {
 ; GFX1250-DAGISEL-NEXT:    ;;#ASMSTART
 ; GFX1250-DAGISEL-NEXT:    ; clobber non-CSR
 ; GFX1250-DAGISEL-NEXT:    ;;#ASMEND
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40, off, s32 offset:12 ; 4-byte Folded Reload
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40, off, s32 offset:12 nv ; 4-byte Folded Reload
 ; GFX1250-DAGISEL-NEXT:    v_dual_cndmask_b32 v0, 5, v0 :: v_dual_cndmask_b32 v1, 3, v1
 ; GFX1250-DAGISEL-NEXT:    v_readlane_b32 s20, v2, 0
 ; GFX1250-DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_2)
@@ -541,10 +541,10 @@ define amdgpu_gfx_whole_wave i32 @csr(i1 %active, i32 %a, i32 %b) {
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, vcc_lo, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3 ; 16-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2, off, s32
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32 offset:4
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:8
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49, off, s32 offset:16
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2, off, s32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32 offset:4 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:8 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49, off, s32 offset:16 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, vcc_lo
 ; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
@@ -636,12 +636,12 @@ define amdgpu_gfx_whole_wave void @csr_vgpr_only(i1 %active, i32 %a, i32 %b) {
 ; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_or_saveexec_b32 s0, -1
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40, s32 ; 4-byte Folded Spill
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40, s32 nv ; 4-byte Folded Spill
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    ;;#ASMSTART
 ; GFX1250-DAGISEL-NEXT:    ; clobber CSR VGPR
 ; GFX1250-DAGISEL-NEXT:    ;;#ASMEND
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40, off, s32 ; 4-byte Folded Reload
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40, off, s32 nv ; 4-byte Folded Reload
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
@@ -748,7 +748,7 @@ define amdgpu_gfx_whole_wave void @sgpr_spill_only(i1 %active, i32 %a, i32 %b) {
 ; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt_dscnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 s0, -1
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 ; 4-byte Folded Spill
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 nv ; 4-byte Folded Spill
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
 ; GFX1250-DAGISEL-NEXT:    v_writelane_b32 v0, s68, 0
@@ -758,7 +758,7 @@ define amdgpu_gfx_whole_wave void @sgpr_spill_only(i1 %active, i32 %a, i32 %b) {
 ; GFX1250-DAGISEL-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 ; GFX1250-DAGISEL-NEXT:    v_readlane_b32 s68, v0, 0
 ; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, s0, -1
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32 ; 4-byte Folded Reload
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32 nv ; 4-byte Folded Reload
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
@@ -1024,8 +1024,8 @@ define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) {
 ; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 vcc_lo, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x1 ; 8-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
 ; GFX1250-DAGISEL-NEXT:    s_delay_alu instid0(SALU_CYCLE_1)
@@ -1039,8 +1039,8 @@ define amdgpu_gfx_whole_wave i32 @multiple_blocks(i1 %active, i32 %a, i32 %b) {
 ; GFX1250-DAGISEL-NEXT:    v_cndmask_b32_e32 v0, v1, v0, vcc_lo
 ; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, vcc_lo, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x1 ; 8-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, vcc_lo
 ; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
@@ -1189,10 +1189,10 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) {
 ; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 vcc_lo, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3 ; 16-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2, s32 offset:8
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3, s32 offset:12
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2, s32 offset:8 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3, s32 offset:12 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
 ; GFX1250-DAGISEL-NEXT:    v_dual_cndmask_b32 v1, 0, v1 :: v_dual_cndmask_b32 v0, 5, v0
@@ -1202,10 +1202,10 @@ define amdgpu_gfx_whole_wave i64 @ret_64(i1 %active, i64 %a, i64 %b) {
 ; GFX1250-DAGISEL-NEXT:    v_mov_b32_dpp v1, v3 quad_perm:[1,0,0,0] row_mask:0x1 bank_mask:0x1
 ; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, vcc_lo, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3 ; 16-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2, off, s32 offset:8
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3, off, s32 offset:12
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2, off, s32 offset:8 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3, off, s32 offset:12 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, vcc_lo
 ; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
@@ -1382,12 +1382,12 @@ define amdgpu_gfx_whole_wave void @inreg_args(i1 %active, i32 inreg %i32, <4 x i
 ; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 s0, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x5 ; 24-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2, s32 offset:8
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3, s32 offset:12
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4, s32 offset:16
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5, s32 offset:20
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2, s32 offset:8 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3, s32 offset:12 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4, s32 offset:16 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5, s32 offset:20 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
 ; GFX1250-DAGISEL-NEXT:    v_dual_mov_b32 v4, s4 :: v_dual_mov_b32 v5, s9
@@ -1400,12 +1400,12 @@ define amdgpu_gfx_whole_wave void @inreg_args(i1 %active, i32 inreg %i32, <4 x i
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, s0, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x5 ; 24-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2, off, s32 offset:8
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3, off, s32 offset:12
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4, off, s32 offset:16
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5, off, s32 offset:20
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2, off, s32 offset:8 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3, off, s32 offset:12 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4, off, s32 offset:16 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5, off, s32 offset:20 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX1250-DAGISEL-NEXT:    s_wait_loadcnt 0x0
@@ -2775,939 +2775,939 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 s33, s32
 ; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 s4, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s33 offset:4
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s33 offset:8
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2, s33 offset:12
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3, s33 offset:16
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4, s33 offset:20
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5, s33 offset:24
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6, s33 offset:28
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7, s33 offset:32
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8, s33 offset:36
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9, s33 offset:40
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10, s33 offset:44
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11, s33 offset:48
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12, s33 offset:52
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13, s33 offset:56
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14, s33 offset:60
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15, s33 offset:64
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16, s33 offset:68
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17, s33 offset:72
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18, s33 offset:76
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19, s33 offset:80
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20, s33 offset:84
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21, s33 offset:88
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22, s33 offset:92
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23, s33 offset:96
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24, s33 offset:100
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25, s33 offset:104
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26, s33 offset:108
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27, s33 offset:112
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28, s33 offset:116
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29, s33 offset:120
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30, s33 offset:124
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31, s33 offset:128
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32, s33 offset:132
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33, s33 offset:136
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34, s33 offset:140
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35, s33 offset:144
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36, s33 offset:148
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37, s33 offset:152
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38, s33 offset:156
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39, s33 offset:160
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48, s33 offset:164
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49, s33 offset:168
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50, s33 offset:172
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51, s33 offset:176
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52, s33 offset:180
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53, s33 offset:184
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54, s33 offset:188
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55, s33 offset:192
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64, s33 offset:196
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65, s33 offset:200
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66, s33 offset:204
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67, s33 offset:208
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68, s33 offset:212
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69, s33 offset:216
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70, s33 offset:220
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71, s33 offset:224
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80, s33 offset:228
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81, s33 offset:232
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82, s33 offset:236
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83, s33 offset:240
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84, s33 offset:244
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85, s33 offset:248
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86, s33 offset:252
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s33 offset:4 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s33 offset:8 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2, s33 offset:12 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3, s33 offset:16 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4, s33 offset:20 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5, s33 offset:24 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6, s33 offset:28 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7, s33 offset:32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8, s33 offset:36 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9, s33 offset:40 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10, s33 offset:44 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11, s33 offset:48 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12, s33 offset:52 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13, s33 offset:56 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14, s33 offset:60 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15, s33 offset:64 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16, s33 offset:68 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17, s33 offset:72 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18, s33 offset:76 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19, s33 offset:80 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20, s33 offset:84 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21, s33 offset:88 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22, s33 offset:92 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23, s33 offset:96 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24, s33 offset:100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25, s33 offset:104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26, s33 offset:108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27, s33 offset:112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28, s33 offset:116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29, s33 offset:120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30, s33 offset:124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31, s33 offset:128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32, s33 offset:132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33, s33 offset:136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34, s33 offset:140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35, s33 offset:144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36, s33 offset:148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37, s33 offset:152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38, s33 offset:156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39, s33 offset:160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48, s33 offset:164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49, s33 offset:168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50, s33 offset:172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51, s33 offset:176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52, s33 offset:180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53, s33 offset:184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54, s33 offset:188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55, s33 offset:192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64, s33 offset:196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65, s33 offset:200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66, s33 offset:204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67, s33 offset:208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68, s33 offset:212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69, s33 offset:216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70, s33 offset:220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71, s33 offset:224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80, s33 offset:228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81, s33 offset:232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82, s33 offset:236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83, s33 offset:240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84, s33 offset:244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85, s33 offset:248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86, s33 offset:252 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87, s33 offset:256
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96, s33 offset:260
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97, s33 offset:264
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98, s33 offset:268
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99, s33 offset:272
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100, s33 offset:276
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101, s33 offset:280
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102, s33 offset:284
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103, s33 offset:288
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112, s33 offset:292
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113, s33 offset:296
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114, s33 offset:300
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115, s33 offset:304
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116, s33 offset:308
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117, s33 offset:312
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118, s33 offset:316
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119, s33 offset:320
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128, s33 offset:324
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129, s33 offset:328
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130, s33 offset:332
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131, s33 offset:336
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132, s33 offset:340
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133, s33 offset:344
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134, s33 offset:348
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135, s33 offset:352
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144, s33 offset:356
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145, s33 offset:360
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146, s33 offset:364
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147, s33 offset:368
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148, s33 offset:372
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149, s33 offset:376
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150, s33 offset:380
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151, s33 offset:384
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160, s33 offset:388
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161, s33 offset:392
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162, s33 offset:396
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163, s33 offset:400
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164, s33 offset:404
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165, s33 offset:408
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166, s33 offset:412
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167, s33 offset:416
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176, s33 offset:420
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177, s33 offset:424
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178, s33 offset:428
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179, s33 offset:432
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180, s33 offset:436
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181, s33 offset:440
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182, s33 offset:444
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183, s33 offset:448
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192, s33 offset:452
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193, s33 offset:456
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194, s33 offset:460
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195, s33 offset:464
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196, s33 offset:468
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197, s33 offset:472
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198, s33 offset:476
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199, s33 offset:480
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208, s33 offset:484
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209, s33 offset:488
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210, s33 offset:492
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211, s33 offset:496
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212, s33 offset:500
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213, s33 offset:504
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87, s33 offset:256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96, s33 offset:260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97, s33 offset:264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98, s33 offset:268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99, s33 offset:272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100, s33 offset:276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101, s33 offset:280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102, s33 offset:284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103, s33 offset:288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112, s33 offset:292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113, s33 offset:296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114, s33 offset:300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115, s33 offset:304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116, s33 offset:308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117, s33 offset:312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118, s33 offset:316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119, s33 offset:320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128, s33 offset:324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129, s33 offset:328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130, s33 offset:332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131, s33 offset:336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132, s33 offset:340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133, s33 offset:344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134, s33 offset:348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135, s33 offset:352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144, s33 offset:356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145, s33 offset:360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146, s33 offset:364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147, s33 offset:368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148, s33 offset:372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149, s33 offset:376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150, s33 offset:380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151, s33 offset:384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160, s33 offset:388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161, s33 offset:392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162, s33 offset:396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163, s33 offset:400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164, s33 offset:404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165, s33 offset:408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166, s33 offset:412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167, s33 offset:416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176, s33 offset:420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177, s33 offset:424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178, s33 offset:428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179, s33 offset:432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180, s33 offset:436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181, s33 offset:440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182, s33 offset:444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183, s33 offset:448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192, s33 offset:452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193, s33 offset:456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194, s33 offset:460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195, s33 offset:464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196, s33 offset:468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197, s33 offset:472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198, s33 offset:476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199, s33 offset:480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208, s33 offset:484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209, s33 offset:488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210, s33 offset:492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211, s33 offset:496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212, s33 offset:500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213, s33 offset:504 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214, s33 offset:508
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215, s33 offset:512
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224, s33 offset:516
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225, s33 offset:520
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226, s33 offset:524
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227, s33 offset:528
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228, s33 offset:532
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229, s33 offset:536
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230, s33 offset:540
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231, s33 offset:544
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240, s33 offset:548
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241, s33 offset:552
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242, s33 offset:556
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243, s33 offset:560
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244, s33 offset:564
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245, s33 offset:568
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246, s33 offset:572
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247, s33 offset:576
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214, s33 offset:508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215, s33 offset:512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224, s33 offset:516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225, s33 offset:520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226, s33 offset:524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227, s33 offset:528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228, s33 offset:532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229, s33 offset:536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230, s33 offset:540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231, s33 offset:544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240, s33 offset:548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241, s33 offset:552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242, s33 offset:556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243, s33 offset:560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244, s33 offset:564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245, s33 offset:568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246, s33 offset:572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247, s33 offset:576 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 4 ; msbs: dst=0 src0=0 src1=1 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v256*/, s33 offset:580
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v257*/, s33 offset:584
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v258*/, s33 offset:588
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v259*/, s33 offset:592
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v260*/, s33 offset:596
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v261*/, s33 offset:600
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v262*/, s33 offset:604
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v263*/, s33 offset:608
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v264*/, s33 offset:612
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v265*/, s33 offset:616
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v266*/, s33 offset:620
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v267*/, s33 offset:624
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v268*/, s33 offset:628
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v269*/, s33 offset:632
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v270*/, s33 offset:636
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v271*/, s33 offset:640
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v272*/, s33 offset:644
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v273*/, s33 offset:648
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v274*/, s33 offset:652
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v275*/, s33 offset:656
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v276*/, s33 offset:660
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v277*/, s33 offset:664
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v278*/, s33 offset:668
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v279*/, s33 offset:672
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v280*/, s33 offset:676
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v281*/, s33 offset:680
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v282*/, s33 offset:684
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v283*/, s33 offset:688
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v284*/, s33 offset:692
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v285*/, s33 offset:696
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v286*/, s33 offset:700
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v287*/, s33 offset:704
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v288*/, s33 offset:708
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v289*/, s33 offset:712
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v290*/, s33 offset:716
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v291*/, s33 offset:720
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v292*/, s33 offset:724
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v293*/, s33 offset:728
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v294*/, s33 offset:732
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v295*/, s33 offset:736
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v296*/, s33 offset:740
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v297*/, s33 offset:744
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v298*/, s33 offset:748
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v299*/, s33 offset:752
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v300*/, s33 offset:756
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v256*/, s33 offset:580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v257*/, s33 offset:584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v258*/, s33 offset:588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v259*/, s33 offset:592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v260*/, s33 offset:596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v261*/, s33 offset:600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v262*/, s33 offset:604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v263*/, s33 offset:608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v264*/, s33 offset:612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v265*/, s33 offset:616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v266*/, s33 offset:620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v267*/, s33 offset:624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v268*/, s33 offset:628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v269*/, s33 offset:632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v270*/, s33 offset:636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v271*/, s33 offset:640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v272*/, s33 offset:644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v273*/, s33 offset:648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v274*/, s33 offset:652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v275*/, s33 offset:656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v276*/, s33 offset:660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v277*/, s33 offset:664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v278*/, s33 offset:668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v279*/, s33 offset:672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v280*/, s33 offset:676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v281*/, s33 offset:680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v282*/, s33 offset:684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v283*/, s33 offset:688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v284*/, s33 offset:692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v285*/, s33 offset:696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v286*/, s33 offset:700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v287*/, s33 offset:704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v288*/, s33 offset:708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v289*/, s33 offset:712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v290*/, s33 offset:716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v291*/, s33 offset:720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v292*/, s33 offset:724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v293*/, s33 offset:728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v294*/, s33 offset:732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v295*/, s33 offset:736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v296*/, s33 offset:740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v297*/, s33 offset:744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v298*/, s33 offset:748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v299*/, s33 offset:752 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v300*/, s33 offset:756 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v301*/, s33 offset:760
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v302*/, s33 offset:764
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v303*/, s33 offset:768
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v304*/, s33 offset:772
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v305*/, s33 offset:776
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v306*/, s33 offset:780
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v307*/, s33 offset:784
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v308*/, s33 offset:788
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v309*/, s33 offset:792
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v310*/, s33 offset:796
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v311*/, s33 offset:800
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v312*/, s33 offset:804
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v313*/, s33 offset:808
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v314*/, s33 offset:812
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v315*/, s33 offset:816
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v316*/, s33 offset:820
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v317*/, s33 offset:824
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v318*/, s33 offset:828
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v319*/, s33 offset:832
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v320*/, s33 offset:836
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v321*/, s33 offset:840
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v322*/, s33 offset:844
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v323*/, s33 offset:848
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v324*/, s33 offset:852
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v325*/, s33 offset:856
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v326*/, s33 offset:860
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v327*/, s33 offset:864
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v328*/, s33 offset:868
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v329*/, s33 offset:872
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v330*/, s33 offset:876
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v331*/, s33 offset:880
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v332*/, s33 offset:884
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v333*/, s33 offset:888
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v334*/, s33 offset:892
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v335*/, s33 offset:896
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v336*/, s33 offset:900
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v337*/, s33 offset:904
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v338*/, s33 offset:908
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v339*/, s33 offset:912
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v340*/, s33 offset:916
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v341*/, s33 offset:920
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v342*/, s33 offset:924
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v343*/, s33 offset:928
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v344*/, s33 offset:932
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v345*/, s33 offset:936
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v346*/, s33 offset:940
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v347*/, s33 offset:944
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v348*/, s33 offset:948
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v349*/, s33 offset:952
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v350*/, s33 offset:956
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v351*/, s33 offset:960
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v352*/, s33 offset:964
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v353*/, s33 offset:968
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v354*/, s33 offset:972
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v355*/, s33 offset:976
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v356*/, s33 offset:980
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v357*/, s33 offset:984
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v358*/, s33 offset:988
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v359*/, s33 offset:992
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v360*/, s33 offset:996
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v361*/, s33 offset:1000
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v362*/, s33 offset:1004
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v363*/, s33 offset:1008
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v301*/, s33 offset:760 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v302*/, s33 offset:764 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v303*/, s33 offset:768 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v304*/, s33 offset:772 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v305*/, s33 offset:776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v306*/, s33 offset:780 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v307*/, s33 offset:784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v308*/, s33 offset:788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v309*/, s33 offset:792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v310*/, s33 offset:796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v311*/, s33 offset:800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v312*/, s33 offset:804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v313*/, s33 offset:808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v314*/, s33 offset:812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v315*/, s33 offset:816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v316*/, s33 offset:820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v317*/, s33 offset:824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v318*/, s33 offset:828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v319*/, s33 offset:832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v320*/, s33 offset:836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v321*/, s33 offset:840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v322*/, s33 offset:844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v323*/, s33 offset:848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v324*/, s33 offset:852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v325*/, s33 offset:856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v326*/, s33 offset:860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v327*/, s33 offset:864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v328*/, s33 offset:868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v329*/, s33 offset:872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v330*/, s33 offset:876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v331*/, s33 offset:880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v332*/, s33 offset:884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v333*/, s33 offset:888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v334*/, s33 offset:892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v335*/, s33 offset:896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v336*/, s33 offset:900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v337*/, s33 offset:904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v338*/, s33 offset:908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v339*/, s33 offset:912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v340*/, s33 offset:916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v341*/, s33 offset:920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v342*/, s33 offset:924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v343*/, s33 offset:928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v344*/, s33 offset:932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v345*/, s33 offset:936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v346*/, s33 offset:940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v347*/, s33 offset:944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v348*/, s33 offset:948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v349*/, s33 offset:952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v350*/, s33 offset:956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v351*/, s33 offset:960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v352*/, s33 offset:964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v353*/, s33 offset:968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v354*/, s33 offset:972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v355*/, s33 offset:976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v356*/, s33 offset:980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v357*/, s33 offset:984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v358*/, s33 offset:988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v359*/, s33 offset:992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v360*/, s33 offset:996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v361*/, s33 offset:1000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v362*/, s33 offset:1004 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v363*/, s33 offset:1008 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v364*/, s33 offset:1012
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v365*/, s33 offset:1016
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v366*/, s33 offset:1020
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v367*/, s33 offset:1024
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v368*/, s33 offset:1028
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v369*/, s33 offset:1032
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v370*/, s33 offset:1036
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v371*/, s33 offset:1040
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v372*/, s33 offset:1044
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v373*/, s33 offset:1048
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v374*/, s33 offset:1052
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v375*/, s33 offset:1056
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v376*/, s33 offset:1060
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v377*/, s33 offset:1064
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v378*/, s33 offset:1068
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v379*/, s33 offset:1072
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v380*/, s33 offset:1076
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v381*/, s33 offset:1080
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v382*/, s33 offset:1084
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v383*/, s33 offset:1088
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v384*/, s33 offset:1092
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v385*/, s33 offset:1096
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v386*/, s33 offset:1100
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v387*/, s33 offset:1104
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v388*/, s33 offset:1108
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v389*/, s33 offset:1112
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v390*/, s33 offset:1116
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v391*/, s33 offset:1120
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v392*/, s33 offset:1124
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v393*/, s33 offset:1128
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v394*/, s33 offset:1132
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v395*/, s33 offset:1136
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v396*/, s33 offset:1140
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v397*/, s33 offset:1144
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v398*/, s33 offset:1148
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v399*/, s33 offset:1152
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v400*/, s33 offset:1156
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v401*/, s33 offset:1160
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v402*/, s33 offset:1164
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v403*/, s33 offset:1168
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v404*/, s33 offset:1172
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v405*/, s33 offset:1176
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v406*/, s33 offset:1180
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v407*/, s33 offset:1184
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v408*/, s33 offset:1188
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v409*/, s33 offset:1192
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v410*/, s33 offset:1196
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v411*/, s33 offset:1200
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v412*/, s33 offset:1204
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v413*/, s33 offset:1208
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v414*/, s33 offset:1212
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v415*/, s33 offset:1216
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v416*/, s33 offset:1220
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v417*/, s33 offset:1224
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v418*/, s33 offset:1228
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v419*/, s33 offset:1232
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v420*/, s33 offset:1236
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v421*/, s33 offset:1240
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v422*/, s33 offset:1244
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v423*/, s33 offset:1248
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v424*/, s33 offset:1252
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v425*/, s33 offset:1256
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v426*/, s33 offset:1260
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v364*/, s33 offset:1012 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v365*/, s33 offset:1016 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v366*/, s33 offset:1020 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v367*/, s33 offset:1024 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v368*/, s33 offset:1028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v369*/, s33 offset:1032 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v370*/, s33 offset:1036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v371*/, s33 offset:1040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v372*/, s33 offset:1044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v373*/, s33 offset:1048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v374*/, s33 offset:1052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v375*/, s33 offset:1056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v376*/, s33 offset:1060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v377*/, s33 offset:1064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v378*/, s33 offset:1068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v379*/, s33 offset:1072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v380*/, s33 offset:1076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v381*/, s33 offset:1080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v382*/, s33 offset:1084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v383*/, s33 offset:1088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v384*/, s33 offset:1092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v385*/, s33 offset:1096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v386*/, s33 offset:1100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v387*/, s33 offset:1104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v388*/, s33 offset:1108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v389*/, s33 offset:1112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v390*/, s33 offset:1116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v391*/, s33 offset:1120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v392*/, s33 offset:1124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v393*/, s33 offset:1128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v394*/, s33 offset:1132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v395*/, s33 offset:1136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v396*/, s33 offset:1140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v397*/, s33 offset:1144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v398*/, s33 offset:1148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v399*/, s33 offset:1152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v400*/, s33 offset:1156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v401*/, s33 offset:1160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v402*/, s33 offset:1164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v403*/, s33 offset:1168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v404*/, s33 offset:1172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v405*/, s33 offset:1176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v406*/, s33 offset:1180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v407*/, s33 offset:1184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v408*/, s33 offset:1188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v409*/, s33 offset:1192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v410*/, s33 offset:1196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v411*/, s33 offset:1200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v412*/, s33 offset:1204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v413*/, s33 offset:1208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v414*/, s33 offset:1212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v415*/, s33 offset:1216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v416*/, s33 offset:1220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v417*/, s33 offset:1224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v418*/, s33 offset:1228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v419*/, s33 offset:1232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v420*/, s33 offset:1236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v421*/, s33 offset:1240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v422*/, s33 offset:1244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v423*/, s33 offset:1248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v424*/, s33 offset:1252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v425*/, s33 offset:1256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v426*/, s33 offset:1260 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v427*/, s33 offset:1264
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v428*/, s33 offset:1268
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v429*/, s33 offset:1272
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v430*/, s33 offset:1276
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v431*/, s33 offset:1280
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v432*/, s33 offset:1284
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v433*/, s33 offset:1288
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v434*/, s33 offset:1292
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v435*/, s33 offset:1296
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v436*/, s33 offset:1300
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v437*/, s33 offset:1304
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v438*/, s33 offset:1308
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v439*/, s33 offset:1312
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v440*/, s33 offset:1316
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v441*/, s33 offset:1320
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v442*/, s33 offset:1324
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v443*/, s33 offset:1328
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v444*/, s33 offset:1332
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v445*/, s33 offset:1336
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v446*/, s33 offset:1340
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v447*/, s33 offset:1344
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v448*/, s33 offset:1348
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v449*/, s33 offset:1352
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v450*/, s33 offset:1356
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v451*/, s33 offset:1360
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v452*/, s33 offset:1364
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v453*/, s33 offset:1368
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v454*/, s33 offset:1372
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v455*/, s33 offset:1376
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v456*/, s33 offset:1380
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v457*/, s33 offset:1384
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v458*/, s33 offset:1388
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v459*/, s33 offset:1392
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v460*/, s33 offset:1396
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v461*/, s33 offset:1400
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v462*/, s33 offset:1404
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v463*/, s33 offset:1408
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v464*/, s33 offset:1412
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v465*/, s33 offset:1416
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v466*/, s33 offset:1420
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v467*/, s33 offset:1424
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v468*/, s33 offset:1428
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v469*/, s33 offset:1432
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v470*/, s33 offset:1436
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v471*/, s33 offset:1440
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v472*/, s33 offset:1444
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v473*/, s33 offset:1448
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v474*/, s33 offset:1452
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v475*/, s33 offset:1456
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v476*/, s33 offset:1460
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v477*/, s33 offset:1464
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v478*/, s33 offset:1468
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v479*/, s33 offset:1472
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v480*/, s33 offset:1476
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v481*/, s33 offset:1480
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v482*/, s33 offset:1484
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v483*/, s33 offset:1488
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v484*/, s33 offset:1492
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v485*/, s33 offset:1496
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v486*/, s33 offset:1500
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v487*/, s33 offset:1504
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v488*/, s33 offset:1508
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v489*/, s33 offset:1512
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v427*/, s33 offset:1264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v428*/, s33 offset:1268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v429*/, s33 offset:1272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v430*/, s33 offset:1276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v431*/, s33 offset:1280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v432*/, s33 offset:1284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v433*/, s33 offset:1288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v434*/, s33 offset:1292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v435*/, s33 offset:1296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v436*/, s33 offset:1300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v437*/, s33 offset:1304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v438*/, s33 offset:1308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v439*/, s33 offset:1312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v440*/, s33 offset:1316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v441*/, s33 offset:1320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v442*/, s33 offset:1324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v443*/, s33 offset:1328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v444*/, s33 offset:1332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v445*/, s33 offset:1336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v446*/, s33 offset:1340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v447*/, s33 offset:1344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v448*/, s33 offset:1348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v449*/, s33 offset:1352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v450*/, s33 offset:1356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v451*/, s33 offset:1360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v452*/, s33 offset:1364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v453*/, s33 offset:1368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v454*/, s33 offset:1372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v455*/, s33 offset:1376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v456*/, s33 offset:1380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v457*/, s33 offset:1384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v458*/, s33 offset:1388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v459*/, s33 offset:1392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v460*/, s33 offset:1396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v461*/, s33 offset:1400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v462*/, s33 offset:1404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v463*/, s33 offset:1408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v464*/, s33 offset:1412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v465*/, s33 offset:1416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v466*/, s33 offset:1420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v467*/, s33 offset:1424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v468*/, s33 offset:1428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v469*/, s33 offset:1432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v470*/, s33 offset:1436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v471*/, s33 offset:1440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v472*/, s33 offset:1444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v473*/, s33 offset:1448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v474*/, s33 offset:1452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v475*/, s33 offset:1456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v476*/, s33 offset:1460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v477*/, s33 offset:1464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v478*/, s33 offset:1468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v479*/, s33 offset:1472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v480*/, s33 offset:1476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v481*/, s33 offset:1480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v482*/, s33 offset:1484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v483*/, s33 offset:1488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v484*/, s33 offset:1492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v485*/, s33 offset:1496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v486*/, s33 offset:1500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v487*/, s33 offset:1504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v488*/, s33 offset:1508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v489*/, s33 offset:1512 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v490*/, s33 offset:1516
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v491*/, s33 offset:1520
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v492*/, s33 offset:1524
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v493*/, s33 offset:1528
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v494*/, s33 offset:1532
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v495*/, s33 offset:1536
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v496*/, s33 offset:1540
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v497*/, s33 offset:1544
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v498*/, s33 offset:1548
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v499*/, s33 offset:1552
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v500*/, s33 offset:1556
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v501*/, s33 offset:1560
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v502*/, s33 offset:1564
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v503*/, s33 offset:1568
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v504*/, s33 offset:1572
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v505*/, s33 offset:1576
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v506*/, s33 offset:1580
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v507*/, s33 offset:1584
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v508*/, s33 offset:1588
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v509*/, s33 offset:1592
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v510*/, s33 offset:1596
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v511*/, s33 offset:1600
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v490*/, s33 offset:1516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v491*/, s33 offset:1520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v492*/, s33 offset:1524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v493*/, s33 offset:1528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v494*/, s33 offset:1532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v495*/, s33 offset:1536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v496*/, s33 offset:1540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v497*/, s33 offset:1544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v498*/, s33 offset:1548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v499*/, s33 offset:1552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v500*/, s33 offset:1556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v501*/, s33 offset:1560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v502*/, s33 offset:1564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v503*/, s33 offset:1568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v504*/, s33 offset:1572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v505*/, s33 offset:1576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v506*/, s33 offset:1580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v507*/, s33 offset:1584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v508*/, s33 offset:1588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v509*/, s33 offset:1592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v510*/, s33 offset:1596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v511*/, s33 offset:1600 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x408 ; msbs: dst=0 src0=0 src1=2 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v512*/, s33 offset:1604
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v513*/, s33 offset:1608
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v514*/, s33 offset:1612
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v515*/, s33 offset:1616
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v516*/, s33 offset:1620
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v517*/, s33 offset:1624
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v518*/, s33 offset:1628
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v519*/, s33 offset:1632
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v520*/, s33 offset:1636
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v521*/, s33 offset:1640
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v522*/, s33 offset:1644
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v523*/, s33 offset:1648
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v524*/, s33 offset:1652
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v525*/, s33 offset:1656
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v526*/, s33 offset:1660
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v527*/, s33 offset:1664
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v528*/, s33 offset:1668
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v529*/, s33 offset:1672
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v530*/, s33 offset:1676
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v531*/, s33 offset:1680
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v532*/, s33 offset:1684
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v533*/, s33 offset:1688
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v534*/, s33 offset:1692
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v535*/, s33 offset:1696
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v536*/, s33 offset:1700
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v537*/, s33 offset:1704
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v538*/, s33 offset:1708
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v539*/, s33 offset:1712
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v540*/, s33 offset:1716
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v541*/, s33 offset:1720
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v542*/, s33 offset:1724
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v543*/, s33 offset:1728
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v544*/, s33 offset:1732
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v545*/, s33 offset:1736
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v546*/, s33 offset:1740
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v547*/, s33 offset:1744
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v548*/, s33 offset:1748
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v549*/, s33 offset:1752
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v550*/, s33 offset:1756
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v551*/, s33 offset:1760
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v552*/, s33 offset:1764
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v512*/, s33 offset:1604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v513*/, s33 offset:1608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v514*/, s33 offset:1612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v515*/, s33 offset:1616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v516*/, s33 offset:1620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v517*/, s33 offset:1624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v518*/, s33 offset:1628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v519*/, s33 offset:1632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v520*/, s33 offset:1636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v521*/, s33 offset:1640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v522*/, s33 offset:1644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v523*/, s33 offset:1648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v524*/, s33 offset:1652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v525*/, s33 offset:1656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v526*/, s33 offset:1660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v527*/, s33 offset:1664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v528*/, s33 offset:1668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v529*/, s33 offset:1672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v530*/, s33 offset:1676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v531*/, s33 offset:1680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v532*/, s33 offset:1684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v533*/, s33 offset:1688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v534*/, s33 offset:1692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v535*/, s33 offset:1696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v536*/, s33 offset:1700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v537*/, s33 offset:1704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v538*/, s33 offset:1708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v539*/, s33 offset:1712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v540*/, s33 offset:1716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v541*/, s33 offset:1720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v542*/, s33 offset:1724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v543*/, s33 offset:1728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v544*/, s33 offset:1732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v545*/, s33 offset:1736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v546*/, s33 offset:1740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v547*/, s33 offset:1744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v548*/, s33 offset:1748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v549*/, s33 offset:1752 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v550*/, s33 offset:1756 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v551*/, s33 offset:1760 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v552*/, s33 offset:1764 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v553*/, s33 offset:1768
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v554*/, s33 offset:1772
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v555*/, s33 offset:1776
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v556*/, s33 offset:1780
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v557*/, s33 offset:1784
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v558*/, s33 offset:1788
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v559*/, s33 offset:1792
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v560*/, s33 offset:1796
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v561*/, s33 offset:1800
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v562*/, s33 offset:1804
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v563*/, s33 offset:1808
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v564*/, s33 offset:1812
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v565*/, s33 offset:1816
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v566*/, s33 offset:1820
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v567*/, s33 offset:1824
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v568*/, s33 offset:1828
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v569*/, s33 offset:1832
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v570*/, s33 offset:1836
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v571*/, s33 offset:1840
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v572*/, s33 offset:1844
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v573*/, s33 offset:1848
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v574*/, s33 offset:1852
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v575*/, s33 offset:1856
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v576*/, s33 offset:1860
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v577*/, s33 offset:1864
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v578*/, s33 offset:1868
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v579*/, s33 offset:1872
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v580*/, s33 offset:1876
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v581*/, s33 offset:1880
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v582*/, s33 offset:1884
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v583*/, s33 offset:1888
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v584*/, s33 offset:1892
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v585*/, s33 offset:1896
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v586*/, s33 offset:1900
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v587*/, s33 offset:1904
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v588*/, s33 offset:1908
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v589*/, s33 offset:1912
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v590*/, s33 offset:1916
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v591*/, s33 offset:1920
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v592*/, s33 offset:1924
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v593*/, s33 offset:1928
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v594*/, s33 offset:1932
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v595*/, s33 offset:1936
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v596*/, s33 offset:1940
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v597*/, s33 offset:1944
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v598*/, s33 offset:1948
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v599*/, s33 offset:1952
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v600*/, s33 offset:1956
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v601*/, s33 offset:1960
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v602*/, s33 offset:1964
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v603*/, s33 offset:1968
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v604*/, s33 offset:1972
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v605*/, s33 offset:1976
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v606*/, s33 offset:1980
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v607*/, s33 offset:1984
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v608*/, s33 offset:1988
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v609*/, s33 offset:1992
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v610*/, s33 offset:1996
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v611*/, s33 offset:2000
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v612*/, s33 offset:2004
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v613*/, s33 offset:2008
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v614*/, s33 offset:2012
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v615*/, s33 offset:2016
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v553*/, s33 offset:1768 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v554*/, s33 offset:1772 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v555*/, s33 offset:1776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v556*/, s33 offset:1780 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v557*/, s33 offset:1784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v558*/, s33 offset:1788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v559*/, s33 offset:1792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v560*/, s33 offset:1796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v561*/, s33 offset:1800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v562*/, s33 offset:1804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v563*/, s33 offset:1808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v564*/, s33 offset:1812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v565*/, s33 offset:1816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v566*/, s33 offset:1820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v567*/, s33 offset:1824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v568*/, s33 offset:1828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v569*/, s33 offset:1832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v570*/, s33 offset:1836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v571*/, s33 offset:1840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v572*/, s33 offset:1844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v573*/, s33 offset:1848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v574*/, s33 offset:1852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v575*/, s33 offset:1856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v576*/, s33 offset:1860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v577*/, s33 offset:1864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v578*/, s33 offset:1868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v579*/, s33 offset:1872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v580*/, s33 offset:1876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v581*/, s33 offset:1880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v582*/, s33 offset:1884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v583*/, s33 offset:1888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v584*/, s33 offset:1892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v585*/, s33 offset:1896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v586*/, s33 offset:1900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v587*/, s33 offset:1904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v588*/, s33 offset:1908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v589*/, s33 offset:1912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v590*/, s33 offset:1916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v591*/, s33 offset:1920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v592*/, s33 offset:1924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v593*/, s33 offset:1928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v594*/, s33 offset:1932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v595*/, s33 offset:1936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v596*/, s33 offset:1940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v597*/, s33 offset:1944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v598*/, s33 offset:1948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v599*/, s33 offset:1952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v600*/, s33 offset:1956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v601*/, s33 offset:1960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v602*/, s33 offset:1964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v603*/, s33 offset:1968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v604*/, s33 offset:1972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v605*/, s33 offset:1976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v606*/, s33 offset:1980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v607*/, s33 offset:1984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v608*/, s33 offset:1988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v609*/, s33 offset:1992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v610*/, s33 offset:1996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v611*/, s33 offset:2000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v612*/, s33 offset:2004 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v613*/, s33 offset:2008 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v614*/, s33 offset:2012 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v615*/, s33 offset:2016 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v616*/, s33 offset:2020
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v617*/, s33 offset:2024
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v618*/, s33 offset:2028
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v619*/, s33 offset:2032
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v620*/, s33 offset:2036
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v621*/, s33 offset:2040
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v622*/, s33 offset:2044
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v623*/, s33 offset:2048
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v624*/, s33 offset:2052
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v625*/, s33 offset:2056
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v626*/, s33 offset:2060
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v627*/, s33 offset:2064
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v628*/, s33 offset:2068
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v629*/, s33 offset:2072
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v630*/, s33 offset:2076
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v631*/, s33 offset:2080
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v632*/, s33 offset:2084
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v633*/, s33 offset:2088
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v634*/, s33 offset:2092
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v635*/, s33 offset:2096
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v636*/, s33 offset:2100
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v637*/, s33 offset:2104
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v638*/, s33 offset:2108
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v639*/, s33 offset:2112
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v640*/, s33 offset:2116
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v641*/, s33 offset:2120
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v642*/, s33 offset:2124
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v643*/, s33 offset:2128
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v644*/, s33 offset:2132
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v645*/, s33 offset:2136
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v646*/, s33 offset:2140
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v647*/, s33 offset:2144
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v648*/, s33 offset:2148
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v649*/, s33 offset:2152
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v650*/, s33 offset:2156
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v651*/, s33 offset:2160
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v652*/, s33 offset:2164
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v653*/, s33 offset:2168
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v654*/, s33 offset:2172
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v655*/, s33 offset:2176
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v656*/, s33 offset:2180
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v657*/, s33 offset:2184
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v658*/, s33 offset:2188
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v659*/, s33 offset:2192
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v660*/, s33 offset:2196
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v661*/, s33 offset:2200
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v662*/, s33 offset:2204
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v663*/, s33 offset:2208
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v664*/, s33 offset:2212
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v665*/, s33 offset:2216
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v666*/, s33 offset:2220
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v667*/, s33 offset:2224
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v668*/, s33 offset:2228
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v669*/, s33 offset:2232
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v670*/, s33 offset:2236
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v671*/, s33 offset:2240
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v672*/, s33 offset:2244
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v673*/, s33 offset:2248
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v674*/, s33 offset:2252
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v675*/, s33 offset:2256
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v676*/, s33 offset:2260
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v677*/, s33 offset:2264
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v678*/, s33 offset:2268
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v616*/, s33 offset:2020 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v617*/, s33 offset:2024 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v618*/, s33 offset:2028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v619*/, s33 offset:2032 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v620*/, s33 offset:2036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v621*/, s33 offset:2040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v622*/, s33 offset:2044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v623*/, s33 offset:2048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v624*/, s33 offset:2052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v625*/, s33 offset:2056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v626*/, s33 offset:2060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v627*/, s33 offset:2064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v628*/, s33 offset:2068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v629*/, s33 offset:2072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v630*/, s33 offset:2076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v631*/, s33 offset:2080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v632*/, s33 offset:2084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v633*/, s33 offset:2088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v634*/, s33 offset:2092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v635*/, s33 offset:2096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v636*/, s33 offset:2100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v637*/, s33 offset:2104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v638*/, s33 offset:2108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v639*/, s33 offset:2112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v640*/, s33 offset:2116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v641*/, s33 offset:2120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v642*/, s33 offset:2124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v643*/, s33 offset:2128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v644*/, s33 offset:2132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v645*/, s33 offset:2136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v646*/, s33 offset:2140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v647*/, s33 offset:2144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v648*/, s33 offset:2148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v649*/, s33 offset:2152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v650*/, s33 offset:2156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v651*/, s33 offset:2160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v652*/, s33 offset:2164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v653*/, s33 offset:2168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v654*/, s33 offset:2172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v655*/, s33 offset:2176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v656*/, s33 offset:2180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v657*/, s33 offset:2184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v658*/, s33 offset:2188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v659*/, s33 offset:2192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v660*/, s33 offset:2196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v661*/, s33 offset:2200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v662*/, s33 offset:2204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v663*/, s33 offset:2208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v664*/, s33 offset:2212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v665*/, s33 offset:2216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v666*/, s33 offset:2220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v667*/, s33 offset:2224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v668*/, s33 offset:2228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v669*/, s33 offset:2232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v670*/, s33 offset:2236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v671*/, s33 offset:2240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v672*/, s33 offset:2244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v673*/, s33 offset:2248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v674*/, s33 offset:2252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v675*/, s33 offset:2256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v676*/, s33 offset:2260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v677*/, s33 offset:2264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v678*/, s33 offset:2268 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v679*/, s33 offset:2272
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v680*/, s33 offset:2276
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v681*/, s33 offset:2280
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v682*/, s33 offset:2284
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v683*/, s33 offset:2288
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v684*/, s33 offset:2292
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v685*/, s33 offset:2296
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v686*/, s33 offset:2300
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v687*/, s33 offset:2304
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v688*/, s33 offset:2308
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v689*/, s33 offset:2312
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v690*/, s33 offset:2316
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v691*/, s33 offset:2320
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v692*/, s33 offset:2324
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v693*/, s33 offset:2328
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v694*/, s33 offset:2332
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v695*/, s33 offset:2336
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v696*/, s33 offset:2340
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v697*/, s33 offset:2344
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v698*/, s33 offset:2348
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v699*/, s33 offset:2352
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v700*/, s33 offset:2356
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v701*/, s33 offset:2360
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v702*/, s33 offset:2364
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v703*/, s33 offset:2368
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v704*/, s33 offset:2372
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v705*/, s33 offset:2376
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v706*/, s33 offset:2380
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v707*/, s33 offset:2384
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v708*/, s33 offset:2388
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v709*/, s33 offset:2392
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v710*/, s33 offset:2396
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v711*/, s33 offset:2400
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v712*/, s33 offset:2404
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v713*/, s33 offset:2408
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v714*/, s33 offset:2412
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v715*/, s33 offset:2416
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v716*/, s33 offset:2420
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v717*/, s33 offset:2424
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v718*/, s33 offset:2428
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v719*/, s33 offset:2432
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v720*/, s33 offset:2436
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v721*/, s33 offset:2440
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v722*/, s33 offset:2444
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v723*/, s33 offset:2448
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v724*/, s33 offset:2452
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v725*/, s33 offset:2456
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v726*/, s33 offset:2460
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v727*/, s33 offset:2464
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v728*/, s33 offset:2468
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v729*/, s33 offset:2472
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v730*/, s33 offset:2476
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v731*/, s33 offset:2480
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v732*/, s33 offset:2484
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v733*/, s33 offset:2488
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v734*/, s33 offset:2492
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v735*/, s33 offset:2496
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v736*/, s33 offset:2500
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v737*/, s33 offset:2504
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v738*/, s33 offset:2508
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v739*/, s33 offset:2512
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v740*/, s33 offset:2516
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v741*/, s33 offset:2520
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v679*/, s33 offset:2272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v680*/, s33 offset:2276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v681*/, s33 offset:2280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v682*/, s33 offset:2284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v683*/, s33 offset:2288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v684*/, s33 offset:2292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v685*/, s33 offset:2296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v686*/, s33 offset:2300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v687*/, s33 offset:2304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v688*/, s33 offset:2308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v689*/, s33 offset:2312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v690*/, s33 offset:2316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v691*/, s33 offset:2320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v692*/, s33 offset:2324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v693*/, s33 offset:2328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v694*/, s33 offset:2332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v695*/, s33 offset:2336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v696*/, s33 offset:2340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v697*/, s33 offset:2344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v698*/, s33 offset:2348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v699*/, s33 offset:2352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v700*/, s33 offset:2356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v701*/, s33 offset:2360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v702*/, s33 offset:2364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v703*/, s33 offset:2368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v704*/, s33 offset:2372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v705*/, s33 offset:2376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v706*/, s33 offset:2380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v707*/, s33 offset:2384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v708*/, s33 offset:2388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v709*/, s33 offset:2392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v710*/, s33 offset:2396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v711*/, s33 offset:2400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v712*/, s33 offset:2404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v713*/, s33 offset:2408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v714*/, s33 offset:2412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v715*/, s33 offset:2416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v716*/, s33 offset:2420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v717*/, s33 offset:2424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v718*/, s33 offset:2428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v719*/, s33 offset:2432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v720*/, s33 offset:2436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v721*/, s33 offset:2440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v722*/, s33 offset:2444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v723*/, s33 offset:2448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v724*/, s33 offset:2452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v725*/, s33 offset:2456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v726*/, s33 offset:2460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v727*/, s33 offset:2464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v728*/, s33 offset:2468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v729*/, s33 offset:2472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v730*/, s33 offset:2476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v731*/, s33 offset:2480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v732*/, s33 offset:2484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v733*/, s33 offset:2488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v734*/, s33 offset:2492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v735*/, s33 offset:2496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v736*/, s33 offset:2500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v737*/, s33 offset:2504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v738*/, s33 offset:2508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v739*/, s33 offset:2512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v740*/, s33 offset:2516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v741*/, s33 offset:2520 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v742*/, s33 offset:2524
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v743*/, s33 offset:2528
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v744*/, s33 offset:2532
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v745*/, s33 offset:2536
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v746*/, s33 offset:2540
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v747*/, s33 offset:2544
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v748*/, s33 offset:2548
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v749*/, s33 offset:2552
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v750*/, s33 offset:2556
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v751*/, s33 offset:2560
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v752*/, s33 offset:2564
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v753*/, s33 offset:2568
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v754*/, s33 offset:2572
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v755*/, s33 offset:2576
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v756*/, s33 offset:2580
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v757*/, s33 offset:2584
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v758*/, s33 offset:2588
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v759*/, s33 offset:2592
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v760*/, s33 offset:2596
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v761*/, s33 offset:2600
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v762*/, s33 offset:2604
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v763*/, s33 offset:2608
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v764*/, s33 offset:2612
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v765*/, s33 offset:2616
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v766*/, s33 offset:2620
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v767*/, s33 offset:2624
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v742*/, s33 offset:2524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v743*/, s33 offset:2528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v744*/, s33 offset:2532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v745*/, s33 offset:2536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v746*/, s33 offset:2540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v747*/, s33 offset:2544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v748*/, s33 offset:2548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v749*/, s33 offset:2552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v750*/, s33 offset:2556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v751*/, s33 offset:2560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v752*/, s33 offset:2564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v753*/, s33 offset:2568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v754*/, s33 offset:2572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v755*/, s33 offset:2576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v756*/, s33 offset:2580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v757*/, s33 offset:2584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v758*/, s33 offset:2588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v759*/, s33 offset:2592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v760*/, s33 offset:2596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v761*/, s33 offset:2600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v762*/, s33 offset:2604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v763*/, s33 offset:2608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v764*/, s33 offset:2612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v765*/, s33 offset:2616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v766*/, s33 offset:2620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v767*/, s33 offset:2624 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x80c ; msbs: dst=0 src0=0 src1=3 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v768*/, s33 offset:2628
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v769*/, s33 offset:2632
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v770*/, s33 offset:2636
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v771*/, s33 offset:2640
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v772*/, s33 offset:2644
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v773*/, s33 offset:2648
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v774*/, s33 offset:2652
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v775*/, s33 offset:2656
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v776*/, s33 offset:2660
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v777*/, s33 offset:2664
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v778*/, s33 offset:2668
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v779*/, s33 offset:2672
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v780*/, s33 offset:2676
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v781*/, s33 offset:2680
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v782*/, s33 offset:2684
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v783*/, s33 offset:2688
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v784*/, s33 offset:2692
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v785*/, s33 offset:2696
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v786*/, s33 offset:2700
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v787*/, s33 offset:2704
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v788*/, s33 offset:2708
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v789*/, s33 offset:2712
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v790*/, s33 offset:2716
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v791*/, s33 offset:2720
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v792*/, s33 offset:2724
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v793*/, s33 offset:2728
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v794*/, s33 offset:2732
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v795*/, s33 offset:2736
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v796*/, s33 offset:2740
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v797*/, s33 offset:2744
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v798*/, s33 offset:2748
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v799*/, s33 offset:2752
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v800*/, s33 offset:2756
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v801*/, s33 offset:2760
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v802*/, s33 offset:2764
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v803*/, s33 offset:2768
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v804*/, s33 offset:2772
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v768*/, s33 offset:2628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v769*/, s33 offset:2632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v770*/, s33 offset:2636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v771*/, s33 offset:2640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v772*/, s33 offset:2644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v773*/, s33 offset:2648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v774*/, s33 offset:2652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v775*/, s33 offset:2656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v776*/, s33 offset:2660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v777*/, s33 offset:2664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v778*/, s33 offset:2668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v779*/, s33 offset:2672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v780*/, s33 offset:2676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v781*/, s33 offset:2680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v782*/, s33 offset:2684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v783*/, s33 offset:2688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v784*/, s33 offset:2692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v785*/, s33 offset:2696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v786*/, s33 offset:2700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v787*/, s33 offset:2704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v788*/, s33 offset:2708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v789*/, s33 offset:2712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v790*/, s33 offset:2716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v791*/, s33 offset:2720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v792*/, s33 offset:2724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v793*/, s33 offset:2728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v794*/, s33 offset:2732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v795*/, s33 offset:2736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v796*/, s33 offset:2740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v797*/, s33 offset:2744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v798*/, s33 offset:2748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v799*/, s33 offset:2752 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v800*/, s33 offset:2756 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v801*/, s33 offset:2760 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v802*/, s33 offset:2764 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v803*/, s33 offset:2768 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v804*/, s33 offset:2772 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v805*/, s33 offset:2776
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v806*/, s33 offset:2780
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v807*/, s33 offset:2784
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v808*/, s33 offset:2788
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v809*/, s33 offset:2792
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v810*/, s33 offset:2796
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v811*/, s33 offset:2800
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v812*/, s33 offset:2804
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v813*/, s33 offset:2808
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v814*/, s33 offset:2812
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v815*/, s33 offset:2816
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v816*/, s33 offset:2820
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v817*/, s33 offset:2824
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v818*/, s33 offset:2828
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v819*/, s33 offset:2832
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v820*/, s33 offset:2836
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v821*/, s33 offset:2840
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v822*/, s33 offset:2844
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v823*/, s33 offset:2848
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v824*/, s33 offset:2852
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v825*/, s33 offset:2856
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v826*/, s33 offset:2860
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v827*/, s33 offset:2864
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v828*/, s33 offset:2868
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v829*/, s33 offset:2872
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v830*/, s33 offset:2876
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v831*/, s33 offset:2880
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v832*/, s33 offset:2884
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v833*/, s33 offset:2888
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v834*/, s33 offset:2892
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v835*/, s33 offset:2896
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v836*/, s33 offset:2900
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v837*/, s33 offset:2904
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v838*/, s33 offset:2908
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v839*/, s33 offset:2912
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v840*/, s33 offset:2916
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v841*/, s33 offset:2920
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v842*/, s33 offset:2924
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v843*/, s33 offset:2928
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v844*/, s33 offset:2932
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v845*/, s33 offset:2936
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v846*/, s33 offset:2940
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v847*/, s33 offset:2944
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v848*/, s33 offset:2948
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v849*/, s33 offset:2952
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v850*/, s33 offset:2956
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v851*/, s33 offset:2960
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v852*/, s33 offset:2964
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v853*/, s33 offset:2968
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v854*/, s33 offset:2972
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v855*/, s33 offset:2976
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v856*/, s33 offset:2980
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v857*/, s33 offset:2984
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v858*/, s33 offset:2988
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v859*/, s33 offset:2992
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v860*/, s33 offset:2996
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v861*/, s33 offset:3000
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v862*/, s33 offset:3004
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v863*/, s33 offset:3008
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v864*/, s33 offset:3012
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v865*/, s33 offset:3016
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v866*/, s33 offset:3020
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v867*/, s33 offset:3024
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v805*/, s33 offset:2776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v806*/, s33 offset:2780 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v807*/, s33 offset:2784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v808*/, s33 offset:2788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v809*/, s33 offset:2792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v810*/, s33 offset:2796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v811*/, s33 offset:2800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v812*/, s33 offset:2804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v813*/, s33 offset:2808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v814*/, s33 offset:2812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v815*/, s33 offset:2816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v816*/, s33 offset:2820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v817*/, s33 offset:2824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v818*/, s33 offset:2828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v819*/, s33 offset:2832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v820*/, s33 offset:2836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v821*/, s33 offset:2840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v822*/, s33 offset:2844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v823*/, s33 offset:2848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v824*/, s33 offset:2852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v825*/, s33 offset:2856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v826*/, s33 offset:2860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v827*/, s33 offset:2864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v828*/, s33 offset:2868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v829*/, s33 offset:2872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v830*/, s33 offset:2876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v831*/, s33 offset:2880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v832*/, s33 offset:2884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v833*/, s33 offset:2888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v834*/, s33 offset:2892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v835*/, s33 offset:2896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v836*/, s33 offset:2900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v837*/, s33 offset:2904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v838*/, s33 offset:2908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v839*/, s33 offset:2912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v840*/, s33 offset:2916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v841*/, s33 offset:2920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v842*/, s33 offset:2924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v843*/, s33 offset:2928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v844*/, s33 offset:2932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v845*/, s33 offset:2936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v846*/, s33 offset:2940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v847*/, s33 offset:2944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v848*/, s33 offset:2948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v849*/, s33 offset:2952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v850*/, s33 offset:2956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v851*/, s33 offset:2960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v852*/, s33 offset:2964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v853*/, s33 offset:2968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v854*/, s33 offset:2972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v855*/, s33 offset:2976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v856*/, s33 offset:2980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v857*/, s33 offset:2984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v858*/, s33 offset:2988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v859*/, s33 offset:2992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v860*/, s33 offset:2996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v861*/, s33 offset:3000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v862*/, s33 offset:3004 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v863*/, s33 offset:3008 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v864*/, s33 offset:3012 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v865*/, s33 offset:3016 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v866*/, s33 offset:3020 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v867*/, s33 offset:3024 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v868*/, s33 offset:3028
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v869*/, s33 offset:3032
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v870*/, s33 offset:3036
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v871*/, s33 offset:3040
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v872*/, s33 offset:3044
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v873*/, s33 offset:3048
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v874*/, s33 offset:3052
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v875*/, s33 offset:3056
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v876*/, s33 offset:3060
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v877*/, s33 offset:3064
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v878*/, s33 offset:3068
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v879*/, s33 offset:3072
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v880*/, s33 offset:3076
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v881*/, s33 offset:3080
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v882*/, s33 offset:3084
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v883*/, s33 offset:3088
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v884*/, s33 offset:3092
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v885*/, s33 offset:3096
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v886*/, s33 offset:3100
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v887*/, s33 offset:3104
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v888*/, s33 offset:3108
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v889*/, s33 offset:3112
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v890*/, s33 offset:3116
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v891*/, s33 offset:3120
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v892*/, s33 offset:3124
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v893*/, s33 offset:3128
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v894*/, s33 offset:3132
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v895*/, s33 offset:3136
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v896*/, s33 offset:3140
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v897*/, s33 offset:3144
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v898*/, s33 offset:3148
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v899*/, s33 offset:3152
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v900*/, s33 offset:3156
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v901*/, s33 offset:3160
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v902*/, s33 offset:3164
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v903*/, s33 offset:3168
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v904*/, s33 offset:3172
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v905*/, s33 offset:3176
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v906*/, s33 offset:3180
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v907*/, s33 offset:3184
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v908*/, s33 offset:3188
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v909*/, s33 offset:3192
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v910*/, s33 offset:3196
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v911*/, s33 offset:3200
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v912*/, s33 offset:3204
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v913*/, s33 offset:3208
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v914*/, s33 offset:3212
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v915*/, s33 offset:3216
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v916*/, s33 offset:3220
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v917*/, s33 offset:3224
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v918*/, s33 offset:3228
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v919*/, s33 offset:3232
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v920*/, s33 offset:3236
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v921*/, s33 offset:3240
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v922*/, s33 offset:3244
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v923*/, s33 offset:3248
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v924*/, s33 offset:3252
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v925*/, s33 offset:3256
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v926*/, s33 offset:3260
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v927*/, s33 offset:3264
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v928*/, s33 offset:3268
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v929*/, s33 offset:3272
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v930*/, s33 offset:3276
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v868*/, s33 offset:3028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v869*/, s33 offset:3032 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v870*/, s33 offset:3036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v871*/, s33 offset:3040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v872*/, s33 offset:3044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v873*/, s33 offset:3048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v874*/, s33 offset:3052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v875*/, s33 offset:3056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v876*/, s33 offset:3060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v877*/, s33 offset:3064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v878*/, s33 offset:3068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v879*/, s33 offset:3072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v880*/, s33 offset:3076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v881*/, s33 offset:3080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v882*/, s33 offset:3084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v883*/, s33 offset:3088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v884*/, s33 offset:3092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v885*/, s33 offset:3096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v886*/, s33 offset:3100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v887*/, s33 offset:3104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v888*/, s33 offset:3108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v889*/, s33 offset:3112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v890*/, s33 offset:3116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v891*/, s33 offset:3120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v892*/, s33 offset:3124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v893*/, s33 offset:3128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v894*/, s33 offset:3132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v895*/, s33 offset:3136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v896*/, s33 offset:3140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v897*/, s33 offset:3144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v898*/, s33 offset:3148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v899*/, s33 offset:3152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v900*/, s33 offset:3156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v901*/, s33 offset:3160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v902*/, s33 offset:3164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v903*/, s33 offset:3168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v904*/, s33 offset:3172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v905*/, s33 offset:3176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v906*/, s33 offset:3180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v907*/, s33 offset:3184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v908*/, s33 offset:3188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v909*/, s33 offset:3192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v910*/, s33 offset:3196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v911*/, s33 offset:3200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v912*/, s33 offset:3204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v913*/, s33 offset:3208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v914*/, s33 offset:3212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v915*/, s33 offset:3216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v916*/, s33 offset:3220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v917*/, s33 offset:3224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v918*/, s33 offset:3228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v919*/, s33 offset:3232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v920*/, s33 offset:3236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v921*/, s33 offset:3240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v922*/, s33 offset:3244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v923*/, s33 offset:3248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v924*/, s33 offset:3252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v925*/, s33 offset:3256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v926*/, s33 offset:3260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v927*/, s33 offset:3264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v928*/, s33 offset:3268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v929*/, s33 offset:3272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v930*/, s33 offset:3276 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v931*/, s33 offset:3280
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v932*/, s33 offset:3284
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v933*/, s33 offset:3288
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v934*/, s33 offset:3292
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v935*/, s33 offset:3296
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v936*/, s33 offset:3300
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v937*/, s33 offset:3304
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v938*/, s33 offset:3308
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v939*/, s33 offset:3312
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v940*/, s33 offset:3316
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v941*/, s33 offset:3320
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v942*/, s33 offset:3324
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v943*/, s33 offset:3328
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v944*/, s33 offset:3332
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v945*/, s33 offset:3336
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v946*/, s33 offset:3340
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v947*/, s33 offset:3344
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v948*/, s33 offset:3348
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v949*/, s33 offset:3352
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v950*/, s33 offset:3356
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v951*/, s33 offset:3360
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v952*/, s33 offset:3364
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v953*/, s33 offset:3368
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v954*/, s33 offset:3372
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v955*/, s33 offset:3376
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v956*/, s33 offset:3380
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v957*/, s33 offset:3384
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v958*/, s33 offset:3388
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v959*/, s33 offset:3392
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v960*/, s33 offset:3396
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v961*/, s33 offset:3400
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v962*/, s33 offset:3404
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v963*/, s33 offset:3408
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v964*/, s33 offset:3412
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v965*/, s33 offset:3416
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v966*/, s33 offset:3420
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v967*/, s33 offset:3424
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v968*/, s33 offset:3428
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v969*/, s33 offset:3432
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v970*/, s33 offset:3436
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v971*/, s33 offset:3440
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v972*/, s33 offset:3444
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v973*/, s33 offset:3448
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v974*/, s33 offset:3452
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v975*/, s33 offset:3456
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v976*/, s33 offset:3460
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v977*/, s33 offset:3464
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v978*/, s33 offset:3468
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v979*/, s33 offset:3472
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v980*/, s33 offset:3476
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v981*/, s33 offset:3480
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v982*/, s33 offset:3484
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v983*/, s33 offset:3488
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v984*/, s33 offset:3492
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v985*/, s33 offset:3496
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v986*/, s33 offset:3500
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v987*/, s33 offset:3504
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v988*/, s33 offset:3508
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v989*/, s33 offset:3512
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v990*/, s33 offset:3516
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v991*/, s33 offset:3520
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v992*/, s33 offset:3524
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v993*/, s33 offset:3528
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v931*/, s33 offset:3280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v932*/, s33 offset:3284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v933*/, s33 offset:3288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v934*/, s33 offset:3292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v935*/, s33 offset:3296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v936*/, s33 offset:3300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v937*/, s33 offset:3304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v938*/, s33 offset:3308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v939*/, s33 offset:3312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v940*/, s33 offset:3316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v941*/, s33 offset:3320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v942*/, s33 offset:3324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v943*/, s33 offset:3328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v944*/, s33 offset:3332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v945*/, s33 offset:3336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v946*/, s33 offset:3340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v947*/, s33 offset:3344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v948*/, s33 offset:3348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v949*/, s33 offset:3352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v950*/, s33 offset:3356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v951*/, s33 offset:3360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v952*/, s33 offset:3364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v953*/, s33 offset:3368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v954*/, s33 offset:3372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v955*/, s33 offset:3376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v956*/, s33 offset:3380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v957*/, s33 offset:3384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v958*/, s33 offset:3388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v959*/, s33 offset:3392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v960*/, s33 offset:3396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v961*/, s33 offset:3400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v962*/, s33 offset:3404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v963*/, s33 offset:3408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v964*/, s33 offset:3412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v965*/, s33 offset:3416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v966*/, s33 offset:3420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v967*/, s33 offset:3424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v968*/, s33 offset:3428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v969*/, s33 offset:3432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v970*/, s33 offset:3436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v971*/, s33 offset:3440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v972*/, s33 offset:3444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v973*/, s33 offset:3448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v974*/, s33 offset:3452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v975*/, s33 offset:3456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v976*/, s33 offset:3460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v977*/, s33 offset:3464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v978*/, s33 offset:3468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v979*/, s33 offset:3472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v980*/, s33 offset:3476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v981*/, s33 offset:3480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v982*/, s33 offset:3484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v983*/, s33 offset:3488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v984*/, s33 offset:3492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v985*/, s33 offset:3496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v986*/, s33 offset:3500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v987*/, s33 offset:3504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v988*/, s33 offset:3508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v989*/, s33 offset:3512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v990*/, s33 offset:3516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v991*/, s33 offset:3520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v992*/, s33 offset:3524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v993*/, s33 offset:3528 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x1d ; 120-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v994*/, s33 offset:3532
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v995*/, s33 offset:3536
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v996*/, s33 offset:3540
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v997*/, s33 offset:3544
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v998*/, s33 offset:3548
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v999*/, s33 offset:3552
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v1000*/, s33 offset:3556
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v1001*/, s33 offset:3560
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v1002*/, s33 offset:3564
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v1003*/, s33 offset:3568
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v1004*/, s33 offset:3572
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v1005*/, s33 offset:3576
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v1006*/, s33 offset:3580
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v1007*/, s33 offset:3584
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v1008*/, s33 offset:3588
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v1009*/, s33 offset:3592
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v1010*/, s33 offset:3596
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v1011*/, s33 offset:3600
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v1012*/, s33 offset:3604
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v1013*/, s33 offset:3608
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v1014*/, s33 offset:3612
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v1015*/, s33 offset:3616
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v1016*/, s33 offset:3620
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v1017*/, s33 offset:3624
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v1018*/, s33 offset:3628
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v1019*/, s33 offset:3632
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v1020*/, s33 offset:3636
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v1021*/, s33 offset:3640
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v1022*/, s33 offset:3644
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v1023*/, s33 offset:3648
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v994*/, s33 offset:3532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v995*/, s33 offset:3536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v996*/, s33 offset:3540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v997*/, s33 offset:3544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v998*/, s33 offset:3548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v999*/, s33 offset:3552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v1000*/, s33 offset:3556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v1001*/, s33 offset:3560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v1002*/, s33 offset:3564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v1003*/, s33 offset:3568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v1004*/, s33 offset:3572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v1005*/, s33 offset:3576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v1006*/, s33 offset:3580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v1007*/, s33 offset:3584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v1008*/, s33 offset:3588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v1009*/, s33 offset:3592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v1010*/, s33 offset:3596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v1011*/, s33 offset:3600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v1012*/, s33 offset:3604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v1013*/, s33 offset:3608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v1014*/, s33 offset:3612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v1015*/, s33 offset:3616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v1016*/, s33 offset:3620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v1017*/, s33 offset:3624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v1018*/, s33 offset:3628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v1019*/, s33 offset:3632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v1020*/, s33 offset:3636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v1021*/, s33 offset:3640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v1022*/, s33 offset:3644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v1023*/, s33 offset:3648 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0xc00 ; msbs: dst=0 src0=0 src1=0 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40, s33 ; 4-byte Folded Spill
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40, s33 nv ; 4-byte Folded Spill
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    v_writelane_b32 v40, s0, 3
 ; GFX1250-DAGISEL-NEXT:    v_mov_b32_e32 v2, v0
@@ -3723,940 +3723,940 @@ define amdgpu_gfx_whole_wave <2 x half> @call_gfx_from_whole_wave(i1 %active, <2
 ; GFX1250-DAGISEL-NEXT:    v_readlane_b32 s30, v40, 1
 ; GFX1250-DAGISEL-NEXT:    v_readlane_b32 s4, v40, 0
 ; GFX1250-DAGISEL-NEXT:    v_readlane_b32 s0, v40, 3
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40, off, s33 ; 4-byte Folded Reload
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40, off, s33 nv ; 4-byte Folded Reload
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 s32, s33
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, s4, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s33 offset:4
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s33 offset:8
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2, off, s33 offset:12
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3, off, s33 offset:16
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4, off, s33 offset:20
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5, off, s33 offset:24
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6, off, s33 offset:28
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7, off, s33 offset:32
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8, off, s33 offset:36
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9, off, s33 offset:40
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10, off, s33 offset:44
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11, off, s33 offset:48
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12, off, s33 offset:52
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13, off, s33 offset:56
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14, off, s33 offset:60
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15, off, s33 offset:64
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16, off, s33 offset:68
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17, off, s33 offset:72
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18, off, s33 offset:76
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19, off, s33 offset:80
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20, off, s33 offset:84
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21, off, s33 offset:88
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22, off, s33 offset:92
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23, off, s33 offset:96
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24, off, s33 offset:100
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25, off, s33 offset:104
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26, off, s33 offset:108
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27, off, s33 offset:112
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28, off, s33 offset:116
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29, off, s33 offset:120
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30, off, s33 offset:124
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31, off, s33 offset:128
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32, off, s33 offset:132
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33, off, s33 offset:136
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34, off, s33 offset:140
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35, off, s33 offset:144
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36, off, s33 offset:148
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37, off, s33 offset:152
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38, off, s33 offset:156
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39, off, s33 offset:160
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48, off, s33 offset:164
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49, off, s33 offset:168
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50, off, s33 offset:172
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51, off, s33 offset:176
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52, off, s33 offset:180
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53, off, s33 offset:184
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54, off, s33 offset:188
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55, off, s33 offset:192
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64, off, s33 offset:196
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65, off, s33 offset:200
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66, off, s33 offset:204
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67, off, s33 offset:208
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68, off, s33 offset:212
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69, off, s33 offset:216
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70, off, s33 offset:220
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71, off, s33 offset:224
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80, off, s33 offset:228
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81, off, s33 offset:232
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82, off, s33 offset:236
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83, off, s33 offset:240
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84, off, s33 offset:244
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85, off, s33 offset:248
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86, off, s33 offset:252
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s33 offset:4 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s33 offset:8 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2, off, s33 offset:12 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3, off, s33 offset:16 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4, off, s33 offset:20 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5, off, s33 offset:24 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6, off, s33 offset:28 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7, off, s33 offset:32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8, off, s33 offset:36 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9, off, s33 offset:40 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10, off, s33 offset:44 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11, off, s33 offset:48 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12, off, s33 offset:52 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13, off, s33 offset:56 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14, off, s33 offset:60 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15, off, s33 offset:64 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16, off, s33 offset:68 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17, off, s33 offset:72 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18, off, s33 offset:76 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19, off, s33 offset:80 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20, off, s33 offset:84 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21, off, s33 offset:88 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22, off, s33 offset:92 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23, off, s33 offset:96 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24, off, s33 offset:100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25, off, s33 offset:104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26, off, s33 offset:108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27, off, s33 offset:112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28, off, s33 offset:116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29, off, s33 offset:120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30, off, s33 offset:124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31, off, s33 offset:128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32, off, s33 offset:132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33, off, s33 offset:136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34, off, s33 offset:140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35, off, s33 offset:144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36, off, s33 offset:148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37, off, s33 offset:152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38, off, s33 offset:156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39, off, s33 offset:160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48, off, s33 offset:164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49, off, s33 offset:168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50, off, s33 offset:172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51, off, s33 offset:176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52, off, s33 offset:180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53, off, s33 offset:184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54, off, s33 offset:188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55, off, s33 offset:192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64, off, s33 offset:196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65, off, s33 offset:200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66, off, s33 offset:204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67, off, s33 offset:208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68, off, s33 offset:212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69, off, s33 offset:216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70, off, s33 offset:220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71, off, s33 offset:224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80, off, s33 offset:228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81, off, s33 offset:232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82, off, s33 offset:236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83, off, s33 offset:240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84, off, s33 offset:244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85, off, s33 offset:248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86, off, s33 offset:252 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87, off, s33 offset:256
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96, off, s33 offset:260
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97, off, s33 offset:264
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98, off, s33 offset:268
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99, off, s33 offset:272
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100, off, s33 offset:276
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101, off, s33 offset:280
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102, off, s33 offset:284
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103, off, s33 offset:288
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112, off, s33 offset:292
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113, off, s33 offset:296
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114, off, s33 offset:300
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115, off, s33 offset:304
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116, off, s33 offset:308
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117, off, s33 offset:312
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118, off, s33 offset:316
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119, off, s33 offset:320
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128, off, s33 offset:324
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129, off, s33 offset:328
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130, off, s33 offset:332
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131, off, s33 offset:336
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132, off, s33 offset:340
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133, off, s33 offset:344
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134, off, s33 offset:348
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135, off, s33 offset:352
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144, off, s33 offset:356
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145, off, s33 offset:360
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146, off, s33 offset:364
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147, off, s33 offset:368
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148, off, s33 offset:372
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149, off, s33 offset:376
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150, off, s33 offset:380
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151, off, s33 offset:384
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160, off, s33 offset:388
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161, off, s33 offset:392
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162, off, s33 offset:396
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163, off, s33 offset:400
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164, off, s33 offset:404
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165, off, s33 offset:408
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166, off, s33 offset:412
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167, off, s33 offset:416
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176, off, s33 offset:420
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177, off, s33 offset:424
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178, off, s33 offset:428
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179, off, s33 offset:432
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180, off, s33 offset:436
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181, off, s33 offset:440
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182, off, s33 offset:444
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183, off, s33 offset:448
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192, off, s33 offset:452
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193, off, s33 offset:456
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194, off, s33 offset:460
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195, off, s33 offset:464
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196, off, s33 offset:468
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197, off, s33 offset:472
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198, off, s33 offset:476
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199, off, s33 offset:480
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208, off, s33 offset:484
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209, off, s33 offset:488
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210, off, s33 offset:492
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211, off, s33 offset:496
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212, off, s33 offset:500
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213, off, s33 offset:504
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87, off, s33 offset:256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96, off, s33 offset:260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97, off, s33 offset:264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98, off, s33 offset:268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99, off, s33 offset:272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100, off, s33 offset:276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101, off, s33 offset:280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102, off, s33 offset:284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103, off, s33 offset:288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112, off, s33 offset:292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113, off, s33 offset:296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114, off, s33 offset:300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115, off, s33 offset:304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116, off, s33 offset:308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117, off, s33 offset:312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118, off, s33 offset:316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119, off, s33 offset:320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128, off, s33 offset:324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129, off, s33 offset:328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130, off, s33 offset:332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131, off, s33 offset:336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132, off, s33 offset:340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133, off, s33 offset:344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134, off, s33 offset:348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135, off, s33 offset:352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144, off, s33 offset:356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145, off, s33 offset:360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146, off, s33 offset:364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147, off, s33 offset:368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148, off, s33 offset:372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149, off, s33 offset:376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150, off, s33 offset:380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151, off, s33 offset:384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160, off, s33 offset:388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161, off, s33 offset:392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162, off, s33 offset:396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163, off, s33 offset:400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164, off, s33 offset:404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165, off, s33 offset:408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166, off, s33 offset:412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167, off, s33 offset:416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176, off, s33 offset:420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177, off, s33 offset:424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178, off, s33 offset:428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179, off, s33 offset:432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180, off, s33 offset:436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181, off, s33 offset:440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182, off, s33 offset:444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183, off, s33 offset:448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192, off, s33 offset:452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193, off, s33 offset:456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194, off, s33 offset:460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195, off, s33 offset:464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196, off, s33 offset:468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197, off, s33 offset:472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198, off, s33 offset:476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199, off, s33 offset:480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208, off, s33 offset:484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209, off, s33 offset:488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210, off, s33 offset:492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211, off, s33 offset:496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212, off, s33 offset:500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213, off, s33 offset:504 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214, off, s33 offset:508
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215, off, s33 offset:512
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224, off, s33 offset:516
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225, off, s33 offset:520
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226, off, s33 offset:524
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227, off, s33 offset:528
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228, off, s33 offset:532
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229, off, s33 offset:536
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230, off, s33 offset:540
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231, off, s33 offset:544
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240, off, s33 offset:548
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241, off, s33 offset:552
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242, off, s33 offset:556
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243, off, s33 offset:560
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244, off, s33 offset:564
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245, off, s33 offset:568
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246, off, s33 offset:572
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247, off, s33 offset:576
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214, off, s33 offset:508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215, off, s33 offset:512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224, off, s33 offset:516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225, off, s33 offset:520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226, off, s33 offset:524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227, off, s33 offset:528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228, off, s33 offset:532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229, off, s33 offset:536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230, off, s33 offset:540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231, off, s33 offset:544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240, off, s33 offset:548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241, off, s33 offset:552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242, off, s33 offset:556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243, off, s33 offset:560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244, off, s33 offset:564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245, off, s33 offset:568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246, off, s33 offset:572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247, off, s33 offset:576 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 64 ; msbs: dst=1 src0=0 src1=0 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v256*/, off, s33 offset:580
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v257*/, off, s33 offset:584
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v258*/, off, s33 offset:588
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v259*/, off, s33 offset:592
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v260*/, off, s33 offset:596
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v261*/, off, s33 offset:600
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v262*/, off, s33 offset:604
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v263*/, off, s33 offset:608
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v264*/, off, s33 offset:612
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v265*/, off, s33 offset:616
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v266*/, off, s33 offset:620
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v267*/, off, s33 offset:624
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v268*/, off, s33 offset:628
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v269*/, off, s33 offset:632
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v270*/, off, s33 offset:636
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v271*/, off, s33 offset:640
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v272*/, off, s33 offset:644
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v273*/, off, s33 offset:648
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v274*/, off, s33 offset:652
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v275*/, off, s33 offset:656
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v276*/, off, s33 offset:660
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v277*/, off, s33 offset:664
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v278*/, off, s33 offset:668
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v279*/, off, s33 offset:672
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v280*/, off, s33 offset:676
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v281*/, off, s33 offset:680
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v282*/, off, s33 offset:684
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v283*/, off, s33 offset:688
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v284*/, off, s33 offset:692
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v285*/, off, s33 offset:696
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v286*/, off, s33 offset:700
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v287*/, off, s33 offset:704
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v288*/, off, s33 offset:708
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v289*/, off, s33 offset:712
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v290*/, off, s33 offset:716
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v291*/, off, s33 offset:720
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v292*/, off, s33 offset:724
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v293*/, off, s33 offset:728
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v294*/, off, s33 offset:732
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v295*/, off, s33 offset:736
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v296*/, off, s33 offset:740
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v297*/, off, s33 offset:744
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v298*/, off, s33 offset:748
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v299*/, off, s33 offset:752
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v300*/, off, s33 offset:756
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v256*/, off, s33 offset:580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v257*/, off, s33 offset:584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v258*/, off, s33 offset:588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v259*/, off, s33 offset:592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v260*/, off, s33 offset:596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v261*/, off, s33 offset:600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v262*/, off, s33 offset:604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v263*/, off, s33 offset:608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v264*/, off, s33 offset:612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v265*/, off, s33 offset:616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v266*/, off, s33 offset:620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v267*/, off, s33 offset:624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v268*/, off, s33 offset:628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v269*/, off, s33 offset:632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v270*/, off, s33 offset:636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v271*/, off, s33 offset:640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v272*/, off, s33 offset:644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v273*/, off, s33 offset:648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v274*/, off, s33 offset:652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v275*/, off, s33 offset:656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v276*/, off, s33 offset:660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v277*/, off, s33 offset:664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v278*/, off, s33 offset:668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v279*/, off, s33 offset:672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v280*/, off, s33 offset:676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v281*/, off, s33 offset:680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v282*/, off, s33 offset:684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v283*/, off, s33 offset:688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v284*/, off, s33 offset:692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v285*/, off, s33 offset:696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v286*/, off, s33 offset:700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v287*/, off, s33 offset:704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v288*/, off, s33 offset:708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v289*/, off, s33 offset:712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v290*/, off, s33 offset:716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v291*/, off, s33 offset:720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v292*/, off, s33 offset:724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v293*/, off, s33 offset:728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v294*/, off, s33 offset:732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v295*/, off, s33 offset:736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v296*/, off, s33 offset:740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v297*/, off, s33 offset:744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v298*/, off, s33 offset:748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v299*/, off, s33 offset:752 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v300*/, off, s33 offset:756 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v301*/, off, s33 offset:760
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v302*/, off, s33 offset:764
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v303*/, off, s33 offset:768
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v304*/, off, s33 offset:772
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v305*/, off, s33 offset:776
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v306*/, off, s33 offset:780
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v307*/, off, s33 offset:784
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v308*/, off, s33 offset:788
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v309*/, off, s33 offset:792
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v310*/, off, s33 offset:796
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v311*/, off, s33 offset:800
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v312*/, off, s33 offset:804
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v313*/, off, s33 offset:808
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v314*/, off, s33 offset:812
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v315*/, off, s33 offset:816
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v316*/, off, s33 offset:820
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v317*/, off, s33 offset:824
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v318*/, off, s33 offset:828
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v319*/, off, s33 offset:832
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v320*/, off, s33 offset:836
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v321*/, off, s33 offset:840
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v322*/, off, s33 offset:844
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v323*/, off, s33 offset:848
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v324*/, off, s33 offset:852
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v325*/, off, s33 offset:856
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v326*/, off, s33 offset:860
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v327*/, off, s33 offset:864
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v328*/, off, s33 offset:868
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v329*/, off, s33 offset:872
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v330*/, off, s33 offset:876
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v331*/, off, s33 offset:880
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v332*/, off, s33 offset:884
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v333*/, off, s33 offset:888
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v334*/, off, s33 offset:892
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v335*/, off, s33 offset:896
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v336*/, off, s33 offset:900
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v337*/, off, s33 offset:904
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v338*/, off, s33 offset:908
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v339*/, off, s33 offset:912
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v340*/, off, s33 offset:916
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v341*/, off, s33 offset:920
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v342*/, off, s33 offset:924
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v343*/, off, s33 offset:928
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v344*/, off, s33 offset:932
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v345*/, off, s33 offset:936
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v346*/, off, s33 offset:940
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v347*/, off, s33 offset:944
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v348*/, off, s33 offset:948
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v349*/, off, s33 offset:952
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v350*/, off, s33 offset:956
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v351*/, off, s33 offset:960
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v352*/, off, s33 offset:964
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v353*/, off, s33 offset:968
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v354*/, off, s33 offset:972
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v355*/, off, s33 offset:976
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v356*/, off, s33 offset:980
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v357*/, off, s33 offset:984
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v358*/, off, s33 offset:988
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v359*/, off, s33 offset:992
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v360*/, off, s33 offset:996
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v361*/, off, s33 offset:1000
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v362*/, off, s33 offset:1004
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v363*/, off, s33 offset:1008
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v301*/, off, s33 offset:760 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v302*/, off, s33 offset:764 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v303*/, off, s33 offset:768 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v304*/, off, s33 offset:772 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v305*/, off, s33 offset:776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v306*/, off, s33 offset:780 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v307*/, off, s33 offset:784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v308*/, off, s33 offset:788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v309*/, off, s33 offset:792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v310*/, off, s33 offset:796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v311*/, off, s33 offset:800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v312*/, off, s33 offset:804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v313*/, off, s33 offset:808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v314*/, off, s33 offset:812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v315*/, off, s33 offset:816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v316*/, off, s33 offset:820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v317*/, off, s33 offset:824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v318*/, off, s33 offset:828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v319*/, off, s33 offset:832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v320*/, off, s33 offset:836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v321*/, off, s33 offset:840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v322*/, off, s33 offset:844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v323*/, off, s33 offset:848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v324*/, off, s33 offset:852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v325*/, off, s33 offset:856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v326*/, off, s33 offset:860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v327*/, off, s33 offset:864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v328*/, off, s33 offset:868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v329*/, off, s33 offset:872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v330*/, off, s33 offset:876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v331*/, off, s33 offset:880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v332*/, off, s33 offset:884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v333*/, off, s33 offset:888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v334*/, off, s33 offset:892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v335*/, off, s33 offset:896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v336*/, off, s33 offset:900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v337*/, off, s33 offset:904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v338*/, off, s33 offset:908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v339*/, off, s33 offset:912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v340*/, off, s33 offset:916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v341*/, off, s33 offset:920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v342*/, off, s33 offset:924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v343*/, off, s33 offset:928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v344*/, off, s33 offset:932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v345*/, off, s33 offset:936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v346*/, off, s33 offset:940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v347*/, off, s33 offset:944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v348*/, off, s33 offset:948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v349*/, off, s33 offset:952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v350*/, off, s33 offset:956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v351*/, off, s33 offset:960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v352*/, off, s33 offset:964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v353*/, off, s33 offset:968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v354*/, off, s33 offset:972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v355*/, off, s33 offset:976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v356*/, off, s33 offset:980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v357*/, off, s33 offset:984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v358*/, off, s33 offset:988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v359*/, off, s33 offset:992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v360*/, off, s33 offset:996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v361*/, off, s33 offset:1000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v362*/, off, s33 offset:1004 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v363*/, off, s33 offset:1008 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v364*/, off, s33 offset:1012
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v365*/, off, s33 offset:1016
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v366*/, off, s33 offset:1020
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v367*/, off, s33 offset:1024
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v368*/, off, s33 offset:1028
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v369*/, off, s33 offset:1032
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v370*/, off, s33 offset:1036
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v371*/, off, s33 offset:1040
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v372*/, off, s33 offset:1044
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v373*/, off, s33 offset:1048
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v374*/, off, s33 offset:1052
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v375*/, off, s33 offset:1056
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v376*/, off, s33 offset:1060
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v377*/, off, s33 offset:1064
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v378*/, off, s33 offset:1068
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v379*/, off, s33 offset:1072
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v380*/, off, s33 offset:1076
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v381*/, off, s33 offset:1080
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v382*/, off, s33 offset:1084
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v383*/, off, s33 offset:1088
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v384*/, off, s33 offset:1092
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v385*/, off, s33 offset:1096
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v386*/, off, s33 offset:1100
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v387*/, off, s33 offset:1104
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v388*/, off, s33 offset:1108
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v389*/, off, s33 offset:1112
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v390*/, off, s33 offset:1116
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v391*/, off, s33 offset:1120
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v392*/, off, s33 offset:1124
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v393*/, off, s33 offset:1128
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v394*/, off, s33 offset:1132
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v395*/, off, s33 offset:1136
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v396*/, off, s33 offset:1140
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v397*/, off, s33 offset:1144
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v398*/, off, s33 offset:1148
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v399*/, off, s33 offset:1152
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v400*/, off, s33 offset:1156
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v401*/, off, s33 offset:1160
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v402*/, off, s33 offset:1164
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v403*/, off, s33 offset:1168
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v404*/, off, s33 offset:1172
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v405*/, off, s33 offset:1176
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v406*/, off, s33 offset:1180
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v407*/, off, s33 offset:1184
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v408*/, off, s33 offset:1188
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v409*/, off, s33 offset:1192
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v410*/, off, s33 offset:1196
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v411*/, off, s33 offset:1200
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v412*/, off, s33 offset:1204
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v413*/, off, s33 offset:1208
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v414*/, off, s33 offset:1212
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v415*/, off, s33 offset:1216
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v416*/, off, s33 offset:1220
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v417*/, off, s33 offset:1224
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v418*/, off, s33 offset:1228
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v419*/, off, s33 offset:1232
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v420*/, off, s33 offset:1236
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v421*/, off, s33 offset:1240
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v422*/, off, s33 offset:1244
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v423*/, off, s33 offset:1248
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v424*/, off, s33 offset:1252
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v425*/, off, s33 offset:1256
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v426*/, off, s33 offset:1260
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v364*/, off, s33 offset:1012 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v365*/, off, s33 offset:1016 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v366*/, off, s33 offset:1020 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v367*/, off, s33 offset:1024 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v368*/, off, s33 offset:1028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v369*/, off, s33 offset:1032 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v370*/, off, s33 offset:1036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v371*/, off, s33 offset:1040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v372*/, off, s33 offset:1044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v373*/, off, s33 offset:1048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v374*/, off, s33 offset:1052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v375*/, off, s33 offset:1056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v376*/, off, s33 offset:1060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v377*/, off, s33 offset:1064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v378*/, off, s33 offset:1068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v379*/, off, s33 offset:1072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v380*/, off, s33 offset:1076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v381*/, off, s33 offset:1080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v382*/, off, s33 offset:1084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v383*/, off, s33 offset:1088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v384*/, off, s33 offset:1092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v385*/, off, s33 offset:1096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v386*/, off, s33 offset:1100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v387*/, off, s33 offset:1104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v388*/, off, s33 offset:1108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v389*/, off, s33 offset:1112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v390*/, off, s33 offset:1116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v391*/, off, s33 offset:1120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v392*/, off, s33 offset:1124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v393*/, off, s33 offset:1128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v394*/, off, s33 offset:1132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v395*/, off, s33 offset:1136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v396*/, off, s33 offset:1140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v397*/, off, s33 offset:1144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v398*/, off, s33 offset:1148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v399*/, off, s33 offset:1152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v400*/, off, s33 offset:1156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v401*/, off, s33 offset:1160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v402*/, off, s33 offset:1164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v403*/, off, s33 offset:1168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v404*/, off, s33 offset:1172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v405*/, off, s33 offset:1176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v406*/, off, s33 offset:1180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v407*/, off, s33 offset:1184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v408*/, off, s33 offset:1188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v409*/, off, s33 offset:1192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v410*/, off, s33 offset:1196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v411*/, off, s33 offset:1200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v412*/, off, s33 offset:1204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v413*/, off, s33 offset:1208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v414*/, off, s33 offset:1212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v415*/, off, s33 offset:1216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v416*/, off, s33 offset:1220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v417*/, off, s33 offset:1224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v418*/, off, s33 offset:1228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v419*/, off, s33 offset:1232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v420*/, off, s33 offset:1236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v421*/, off, s33 offset:1240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v422*/, off, s33 offset:1244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v423*/, off, s33 offset:1248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v424*/, off, s33 offset:1252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v425*/, off, s33 offset:1256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v426*/, off, s33 offset:1260 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v427*/, off, s33 offset:1264
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v428*/, off, s33 offset:1268
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v429*/, off, s33 offset:1272
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v430*/, off, s33 offset:1276
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v431*/, off, s33 offset:1280
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v432*/, off, s33 offset:1284
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v433*/, off, s33 offset:1288
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v434*/, off, s33 offset:1292
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v435*/, off, s33 offset:1296
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v436*/, off, s33 offset:1300
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v437*/, off, s33 offset:1304
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v438*/, off, s33 offset:1308
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v439*/, off, s33 offset:1312
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v440*/, off, s33 offset:1316
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v441*/, off, s33 offset:1320
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v442*/, off, s33 offset:1324
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v443*/, off, s33 offset:1328
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v444*/, off, s33 offset:1332
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v445*/, off, s33 offset:1336
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v446*/, off, s33 offset:1340
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v447*/, off, s33 offset:1344
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v448*/, off, s33 offset:1348
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v449*/, off, s33 offset:1352
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v450*/, off, s33 offset:1356
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v451*/, off, s33 offset:1360
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v452*/, off, s33 offset:1364
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v453*/, off, s33 offset:1368
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v454*/, off, s33 offset:1372
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v455*/, off, s33 offset:1376
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v456*/, off, s33 offset:1380
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v457*/, off, s33 offset:1384
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v458*/, off, s33 offset:1388
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v459*/, off, s33 offset:1392
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v460*/, off, s33 offset:1396
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v461*/, off, s33 offset:1400
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v462*/, off, s33 offset:1404
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v463*/, off, s33 offset:1408
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v464*/, off, s33 offset:1412
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v465*/, off, s33 offset:1416
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v466*/, off, s33 offset:1420
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v467*/, off, s33 offset:1424
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v468*/, off, s33 offset:1428
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v469*/, off, s33 offset:1432
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v470*/, off, s33 offset:1436
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v471*/, off, s33 offset:1440
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v472*/, off, s33 offset:1444
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v473*/, off, s33 offset:1448
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v474*/, off, s33 offset:1452
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v475*/, off, s33 offset:1456
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v476*/, off, s33 offset:1460
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v477*/, off, s33 offset:1464
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v478*/, off, s33 offset:1468
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v479*/, off, s33 offset:1472
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v480*/, off, s33 offset:1476
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v481*/, off, s33 offset:1480
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v482*/, off, s33 offset:1484
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v483*/, off, s33 offset:1488
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v484*/, off, s33 offset:1492
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v485*/, off, s33 offset:1496
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v486*/, off, s33 offset:1500
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v487*/, off, s33 offset:1504
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v488*/, off, s33 offset:1508
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v489*/, off, s33 offset:1512
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v427*/, off, s33 offset:1264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v428*/, off, s33 offset:1268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v429*/, off, s33 offset:1272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v430*/, off, s33 offset:1276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v431*/, off, s33 offset:1280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v432*/, off, s33 offset:1284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v433*/, off, s33 offset:1288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v434*/, off, s33 offset:1292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v435*/, off, s33 offset:1296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v436*/, off, s33 offset:1300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v437*/, off, s33 offset:1304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v438*/, off, s33 offset:1308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v439*/, off, s33 offset:1312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v440*/, off, s33 offset:1316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v441*/, off, s33 offset:1320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v442*/, off, s33 offset:1324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v443*/, off, s33 offset:1328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v444*/, off, s33 offset:1332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v445*/, off, s33 offset:1336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v446*/, off, s33 offset:1340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v447*/, off, s33 offset:1344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v448*/, off, s33 offset:1348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v449*/, off, s33 offset:1352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v450*/, off, s33 offset:1356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v451*/, off, s33 offset:1360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v452*/, off, s33 offset:1364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v453*/, off, s33 offset:1368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v454*/, off, s33 offset:1372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v455*/, off, s33 offset:1376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v456*/, off, s33 offset:1380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v457*/, off, s33 offset:1384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v458*/, off, s33 offset:1388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v459*/, off, s33 offset:1392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v460*/, off, s33 offset:1396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v461*/, off, s33 offset:1400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v462*/, off, s33 offset:1404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v463*/, off, s33 offset:1408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v464*/, off, s33 offset:1412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v465*/, off, s33 offset:1416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v466*/, off, s33 offset:1420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v467*/, off, s33 offset:1424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v468*/, off, s33 offset:1428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v469*/, off, s33 offset:1432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v470*/, off, s33 offset:1436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v471*/, off, s33 offset:1440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v472*/, off, s33 offset:1444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v473*/, off, s33 offset:1448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v474*/, off, s33 offset:1452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v475*/, off, s33 offset:1456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v476*/, off, s33 offset:1460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v477*/, off, s33 offset:1464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v478*/, off, s33 offset:1468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v479*/, off, s33 offset:1472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v480*/, off, s33 offset:1476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v481*/, off, s33 offset:1480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v482*/, off, s33 offset:1484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v483*/, off, s33 offset:1488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v484*/, off, s33 offset:1492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v485*/, off, s33 offset:1496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v486*/, off, s33 offset:1500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v487*/, off, s33 offset:1504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v488*/, off, s33 offset:1508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v489*/, off, s33 offset:1512 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v490*/, off, s33 offset:1516
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v491*/, off, s33 offset:1520
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v492*/, off, s33 offset:1524
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v493*/, off, s33 offset:1528
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v494*/, off, s33 offset:1532
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v495*/, off, s33 offset:1536
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v496*/, off, s33 offset:1540
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v497*/, off, s33 offset:1544
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v498*/, off, s33 offset:1548
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v499*/, off, s33 offset:1552
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v500*/, off, s33 offset:1556
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v501*/, off, s33 offset:1560
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v502*/, off, s33 offset:1564
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v503*/, off, s33 offset:1568
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v504*/, off, s33 offset:1572
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v505*/, off, s33 offset:1576
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v506*/, off, s33 offset:1580
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v507*/, off, s33 offset:1584
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v508*/, off, s33 offset:1588
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v509*/, off, s33 offset:1592
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v510*/, off, s33 offset:1596
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v511*/, off, s33 offset:1600
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v490*/, off, s33 offset:1516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v491*/, off, s33 offset:1520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v492*/, off, s33 offset:1524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v493*/, off, s33 offset:1528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v494*/, off, s33 offset:1532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v495*/, off, s33 offset:1536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v496*/, off, s33 offset:1540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v497*/, off, s33 offset:1544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v498*/, off, s33 offset:1548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v499*/, off, s33 offset:1552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v500*/, off, s33 offset:1556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v501*/, off, s33 offset:1560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v502*/, off, s33 offset:1564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v503*/, off, s33 offset:1568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v504*/, off, s33 offset:1572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v505*/, off, s33 offset:1576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v506*/, off, s33 offset:1580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v507*/, off, s33 offset:1584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v508*/, off, s33 offset:1588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v509*/, off, s33 offset:1592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v510*/, off, s33 offset:1596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v511*/, off, s33 offset:1600 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x4080 ; msbs: dst=2 src0=0 src1=0 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v512*/, off, s33 offset:1604
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v513*/, off, s33 offset:1608
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v514*/, off, s33 offset:1612
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v515*/, off, s33 offset:1616
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v516*/, off, s33 offset:1620
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v517*/, off, s33 offset:1624
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v518*/, off, s33 offset:1628
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v519*/, off, s33 offset:1632
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v520*/, off, s33 offset:1636
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v521*/, off, s33 offset:1640
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v522*/, off, s33 offset:1644
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v523*/, off, s33 offset:1648
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v524*/, off, s33 offset:1652
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v525*/, off, s33 offset:1656
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v526*/, off, s33 offset:1660
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v527*/, off, s33 offset:1664
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v528*/, off, s33 offset:1668
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v529*/, off, s33 offset:1672
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v530*/, off, s33 offset:1676
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v531*/, off, s33 offset:1680
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v532*/, off, s33 offset:1684
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v533*/, off, s33 offset:1688
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v534*/, off, s33 offset:1692
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v535*/, off, s33 offset:1696
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v536*/, off, s33 offset:1700
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v537*/, off, s33 offset:1704
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v538*/, off, s33 offset:1708
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v539*/, off, s33 offset:1712
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v540*/, off, s33 offset:1716
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v541*/, off, s33 offset:1720
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v542*/, off, s33 offset:1724
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v543*/, off, s33 offset:1728
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v544*/, off, s33 offset:1732
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v545*/, off, s33 offset:1736
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v546*/, off, s33 offset:1740
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v547*/, off, s33 offset:1744
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v548*/, off, s33 offset:1748
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v549*/, off, s33 offset:1752
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v550*/, off, s33 offset:1756
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v551*/, off, s33 offset:1760
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v552*/, off, s33 offset:1764
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v512*/, off, s33 offset:1604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v513*/, off, s33 offset:1608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v514*/, off, s33 offset:1612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v515*/, off, s33 offset:1616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v516*/, off, s33 offset:1620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v517*/, off, s33 offset:1624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v518*/, off, s33 offset:1628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v519*/, off, s33 offset:1632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v520*/, off, s33 offset:1636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v521*/, off, s33 offset:1640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v522*/, off, s33 offset:1644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v523*/, off, s33 offset:1648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v524*/, off, s33 offset:1652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v525*/, off, s33 offset:1656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v526*/, off, s33 offset:1660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v527*/, off, s33 offset:1664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v528*/, off, s33 offset:1668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v529*/, off, s33 offset:1672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v530*/, off, s33 offset:1676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v531*/, off, s33 offset:1680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v532*/, off, s33 offset:1684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v533*/, off, s33 offset:1688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v534*/, off, s33 offset:1692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v535*/, off, s33 offset:1696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v536*/, off, s33 offset:1700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v537*/, off, s33 offset:1704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v538*/, off, s33 offset:1708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v539*/, off, s33 offset:1712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v540*/, off, s33 offset:1716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v541*/, off, s33 offset:1720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v542*/, off, s33 offset:1724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v543*/, off, s33 offset:1728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v544*/, off, s33 offset:1732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v545*/, off, s33 offset:1736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v546*/, off, s33 offset:1740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v547*/, off, s33 offset:1744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v548*/, off, s33 offset:1748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v549*/, off, s33 offset:1752 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v550*/, off, s33 offset:1756 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v551*/, off, s33 offset:1760 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v552*/, off, s33 offset:1764 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v553*/, off, s33 offset:1768
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v554*/, off, s33 offset:1772
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v555*/, off, s33 offset:1776
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v556*/, off, s33 offset:1780
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v557*/, off, s33 offset:1784
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v558*/, off, s33 offset:1788
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v559*/, off, s33 offset:1792
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v560*/, off, s33 offset:1796
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v561*/, off, s33 offset:1800
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v562*/, off, s33 offset:1804
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v563*/, off, s33 offset:1808
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v564*/, off, s33 offset:1812
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v565*/, off, s33 offset:1816
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v566*/, off, s33 offset:1820
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v567*/, off, s33 offset:1824
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v568*/, off, s33 offset:1828
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v569*/, off, s33 offset:1832
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v570*/, off, s33 offset:1836
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v571*/, off, s33 offset:1840
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v572*/, off, s33 offset:1844
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v573*/, off, s33 offset:1848
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v574*/, off, s33 offset:1852
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v575*/, off, s33 offset:1856
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v576*/, off, s33 offset:1860
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v577*/, off, s33 offset:1864
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v578*/, off, s33 offset:1868
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v579*/, off, s33 offset:1872
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v580*/, off, s33 offset:1876
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v581*/, off, s33 offset:1880
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v582*/, off, s33 offset:1884
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v583*/, off, s33 offset:1888
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v584*/, off, s33 offset:1892
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v585*/, off, s33 offset:1896
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v586*/, off, s33 offset:1900
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v587*/, off, s33 offset:1904
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v588*/, off, s33 offset:1908
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v589*/, off, s33 offset:1912
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v590*/, off, s33 offset:1916
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v591*/, off, s33 offset:1920
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v592*/, off, s33 offset:1924
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v593*/, off, s33 offset:1928
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v594*/, off, s33 offset:1932
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v595*/, off, s33 offset:1936
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v596*/, off, s33 offset:1940
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v597*/, off, s33 offset:1944
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v598*/, off, s33 offset:1948
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v599*/, off, s33 offset:1952
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v600*/, off, s33 offset:1956
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v601*/, off, s33 offset:1960
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v602*/, off, s33 offset:1964
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v603*/, off, s33 offset:1968
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v604*/, off, s33 offset:1972
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v605*/, off, s33 offset:1976
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v606*/, off, s33 offset:1980
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v607*/, off, s33 offset:1984
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v608*/, off, s33 offset:1988
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v609*/, off, s33 offset:1992
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v610*/, off, s33 offset:1996
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v611*/, off, s33 offset:2000
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v612*/, off, s33 offset:2004
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v613*/, off, s33 offset:2008
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v614*/, off, s33 offset:2012
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v615*/, off, s33 offset:2016
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v553*/, off, s33 offset:1768 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v554*/, off, s33 offset:1772 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v555*/, off, s33 offset:1776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v556*/, off, s33 offset:1780 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v557*/, off, s33 offset:1784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v558*/, off, s33 offset:1788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v559*/, off, s33 offset:1792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v560*/, off, s33 offset:1796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v561*/, off, s33 offset:1800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v562*/, off, s33 offset:1804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v563*/, off, s33 offset:1808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v564*/, off, s33 offset:1812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v565*/, off, s33 offset:1816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v566*/, off, s33 offset:1820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v567*/, off, s33 offset:1824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v568*/, off, s33 offset:1828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v569*/, off, s33 offset:1832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v570*/, off, s33 offset:1836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v571*/, off, s33 offset:1840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v572*/, off, s33 offset:1844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v573*/, off, s33 offset:1848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v574*/, off, s33 offset:1852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v575*/, off, s33 offset:1856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v576*/, off, s33 offset:1860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v577*/, off, s33 offset:1864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v578*/, off, s33 offset:1868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v579*/, off, s33 offset:1872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v580*/, off, s33 offset:1876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v581*/, off, s33 offset:1880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v582*/, off, s33 offset:1884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v583*/, off, s33 offset:1888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v584*/, off, s33 offset:1892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v585*/, off, s33 offset:1896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v586*/, off, s33 offset:1900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v587*/, off, s33 offset:1904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v588*/, off, s33 offset:1908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v589*/, off, s33 offset:1912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v590*/, off, s33 offset:1916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v591*/, off, s33 offset:1920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v592*/, off, s33 offset:1924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v593*/, off, s33 offset:1928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v594*/, off, s33 offset:1932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v595*/, off, s33 offset:1936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v596*/, off, s33 offset:1940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v597*/, off, s33 offset:1944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v598*/, off, s33 offset:1948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v599*/, off, s33 offset:1952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v600*/, off, s33 offset:1956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v601*/, off, s33 offset:1960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v602*/, off, s33 offset:1964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v603*/, off, s33 offset:1968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v604*/, off, s33 offset:1972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v605*/, off, s33 offset:1976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v606*/, off, s33 offset:1980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v607*/, off, s33 offset:1984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v608*/, off, s33 offset:1988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v609*/, off, s33 offset:1992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v610*/, off, s33 offset:1996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v611*/, off, s33 offset:2000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v612*/, off, s33 offset:2004 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v613*/, off, s33 offset:2008 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v614*/, off, s33 offset:2012 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v615*/, off, s33 offset:2016 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v616*/, off, s33 offset:2020
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v617*/, off, s33 offset:2024
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v618*/, off, s33 offset:2028
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v619*/, off, s33 offset:2032
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v620*/, off, s33 offset:2036
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v621*/, off, s33 offset:2040
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v622*/, off, s33 offset:2044
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v623*/, off, s33 offset:2048
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v624*/, off, s33 offset:2052
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v625*/, off, s33 offset:2056
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v626*/, off, s33 offset:2060
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v627*/, off, s33 offset:2064
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v628*/, off, s33 offset:2068
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v629*/, off, s33 offset:2072
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v630*/, off, s33 offset:2076
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v631*/, off, s33 offset:2080
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v632*/, off, s33 offset:2084
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v633*/, off, s33 offset:2088
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v634*/, off, s33 offset:2092
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v635*/, off, s33 offset:2096
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v636*/, off, s33 offset:2100
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v637*/, off, s33 offset:2104
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v638*/, off, s33 offset:2108
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v639*/, off, s33 offset:2112
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v640*/, off, s33 offset:2116
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v641*/, off, s33 offset:2120
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v642*/, off, s33 offset:2124
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v643*/, off, s33 offset:2128
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v644*/, off, s33 offset:2132
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v645*/, off, s33 offset:2136
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v646*/, off, s33 offset:2140
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v647*/, off, s33 offset:2144
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v648*/, off, s33 offset:2148
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v649*/, off, s33 offset:2152
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v650*/, off, s33 offset:2156
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v651*/, off, s33 offset:2160
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v652*/, off, s33 offset:2164
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v653*/, off, s33 offset:2168
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v654*/, off, s33 offset:2172
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v655*/, off, s33 offset:2176
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v656*/, off, s33 offset:2180
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v657*/, off, s33 offset:2184
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v658*/, off, s33 offset:2188
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v659*/, off, s33 offset:2192
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v660*/, off, s33 offset:2196
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v661*/, off, s33 offset:2200
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v662*/, off, s33 offset:2204
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v663*/, off, s33 offset:2208
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v664*/, off, s33 offset:2212
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v665*/, off, s33 offset:2216
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v666*/, off, s33 offset:2220
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v667*/, off, s33 offset:2224
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v668*/, off, s33 offset:2228
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v669*/, off, s33 offset:2232
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v670*/, off, s33 offset:2236
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v671*/, off, s33 offset:2240
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v672*/, off, s33 offset:2244
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v673*/, off, s33 offset:2248
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v674*/, off, s33 offset:2252
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v675*/, off, s33 offset:2256
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v676*/, off, s33 offset:2260
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v677*/, off, s33 offset:2264
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v678*/, off, s33 offset:2268
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v616*/, off, s33 offset:2020 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v617*/, off, s33 offset:2024 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v618*/, off, s33 offset:2028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v619*/, off, s33 offset:2032 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v620*/, off, s33 offset:2036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v621*/, off, s33 offset:2040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v622*/, off, s33 offset:2044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v623*/, off, s33 offset:2048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v624*/, off, s33 offset:2052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v625*/, off, s33 offset:2056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v626*/, off, s33 offset:2060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v627*/, off, s33 offset:2064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v628*/, off, s33 offset:2068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v629*/, off, s33 offset:2072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v630*/, off, s33 offset:2076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v631*/, off, s33 offset:2080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v632*/, off, s33 offset:2084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v633*/, off, s33 offset:2088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v634*/, off, s33 offset:2092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v635*/, off, s33 offset:2096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v636*/, off, s33 offset:2100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v637*/, off, s33 offset:2104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v638*/, off, s33 offset:2108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v639*/, off, s33 offset:2112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v640*/, off, s33 offset:2116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v641*/, off, s33 offset:2120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v642*/, off, s33 offset:2124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v643*/, off, s33 offset:2128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v644*/, off, s33 offset:2132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v645*/, off, s33 offset:2136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v646*/, off, s33 offset:2140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v647*/, off, s33 offset:2144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v648*/, off, s33 offset:2148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v649*/, off, s33 offset:2152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v650*/, off, s33 offset:2156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v651*/, off, s33 offset:2160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v652*/, off, s33 offset:2164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v653*/, off, s33 offset:2168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v654*/, off, s33 offset:2172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v655*/, off, s33 offset:2176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v656*/, off, s33 offset:2180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v657*/, off, s33 offset:2184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v658*/, off, s33 offset:2188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v659*/, off, s33 offset:2192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v660*/, off, s33 offset:2196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v661*/, off, s33 offset:2200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v662*/, off, s33 offset:2204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v663*/, off, s33 offset:2208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v664*/, off, s33 offset:2212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v665*/, off, s33 offset:2216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v666*/, off, s33 offset:2220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v667*/, off, s33 offset:2224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v668*/, off, s33 offset:2228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v669*/, off, s33 offset:2232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v670*/, off, s33 offset:2236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v671*/, off, s33 offset:2240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v672*/, off, s33 offset:2244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v673*/, off, s33 offset:2248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v674*/, off, s33 offset:2252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v675*/, off, s33 offset:2256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v676*/, off, s33 offset:2260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v677*/, off, s33 offset:2264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v678*/, off, s33 offset:2268 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v679*/, off, s33 offset:2272
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v680*/, off, s33 offset:2276
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v681*/, off, s33 offset:2280
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v682*/, off, s33 offset:2284
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v683*/, off, s33 offset:2288
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v684*/, off, s33 offset:2292
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v685*/, off, s33 offset:2296
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v686*/, off, s33 offset:2300
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v687*/, off, s33 offset:2304
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v688*/, off, s33 offset:2308
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v689*/, off, s33 offset:2312
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v690*/, off, s33 offset:2316
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v691*/, off, s33 offset:2320
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v692*/, off, s33 offset:2324
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v693*/, off, s33 offset:2328
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v694*/, off, s33 offset:2332
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v695*/, off, s33 offset:2336
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v696*/, off, s33 offset:2340
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v697*/, off, s33 offset:2344
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v698*/, off, s33 offset:2348
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v699*/, off, s33 offset:2352
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v700*/, off, s33 offset:2356
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v701*/, off, s33 offset:2360
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v702*/, off, s33 offset:2364
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v703*/, off, s33 offset:2368
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v704*/, off, s33 offset:2372
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v705*/, off, s33 offset:2376
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v706*/, off, s33 offset:2380
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v707*/, off, s33 offset:2384
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v708*/, off, s33 offset:2388
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v709*/, off, s33 offset:2392
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v710*/, off, s33 offset:2396
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v711*/, off, s33 offset:2400
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v712*/, off, s33 offset:2404
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v713*/, off, s33 offset:2408
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v714*/, off, s33 offset:2412
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v715*/, off, s33 offset:2416
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v716*/, off, s33 offset:2420
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v717*/, off, s33 offset:2424
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v718*/, off, s33 offset:2428
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v719*/, off, s33 offset:2432
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v720*/, off, s33 offset:2436
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v721*/, off, s33 offset:2440
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v722*/, off, s33 offset:2444
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v723*/, off, s33 offset:2448
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v724*/, off, s33 offset:2452
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v725*/, off, s33 offset:2456
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v726*/, off, s33 offset:2460
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v727*/, off, s33 offset:2464
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v728*/, off, s33 offset:2468
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v729*/, off, s33 offset:2472
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v730*/, off, s33 offset:2476
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v731*/, off, s33 offset:2480
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v732*/, off, s33 offset:2484
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v733*/, off, s33 offset:2488
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v734*/, off, s33 offset:2492
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v735*/, off, s33 offset:2496
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v736*/, off, s33 offset:2500
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v737*/, off, s33 offset:2504
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v738*/, off, s33 offset:2508
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v739*/, off, s33 offset:2512
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v740*/, off, s33 offset:2516
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v741*/, off, s33 offset:2520
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v679*/, off, s33 offset:2272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v680*/, off, s33 offset:2276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v681*/, off, s33 offset:2280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v682*/, off, s33 offset:2284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v683*/, off, s33 offset:2288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v684*/, off, s33 offset:2292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v685*/, off, s33 offset:2296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v686*/, off, s33 offset:2300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v687*/, off, s33 offset:2304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v688*/, off, s33 offset:2308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v689*/, off, s33 offset:2312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v690*/, off, s33 offset:2316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v691*/, off, s33 offset:2320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v692*/, off, s33 offset:2324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v693*/, off, s33 offset:2328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v694*/, off, s33 offset:2332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v695*/, off, s33 offset:2336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v696*/, off, s33 offset:2340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v697*/, off, s33 offset:2344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v698*/, off, s33 offset:2348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v699*/, off, s33 offset:2352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v700*/, off, s33 offset:2356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v701*/, off, s33 offset:2360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v702*/, off, s33 offset:2364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v703*/, off, s33 offset:2368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v704*/, off, s33 offset:2372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v705*/, off, s33 offset:2376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v706*/, off, s33 offset:2380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v707*/, off, s33 offset:2384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v708*/, off, s33 offset:2388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v709*/, off, s33 offset:2392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v710*/, off, s33 offset:2396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v711*/, off, s33 offset:2400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v712*/, off, s33 offset:2404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v713*/, off, s33 offset:2408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v714*/, off, s33 offset:2412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v715*/, off, s33 offset:2416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v716*/, off, s33 offset:2420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v717*/, off, s33 offset:2424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v718*/, off, s33 offset:2428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v719*/, off, s33 offset:2432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v720*/, off, s33 offset:2436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v721*/, off, s33 offset:2440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v722*/, off, s33 offset:2444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v723*/, off, s33 offset:2448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v724*/, off, s33 offset:2452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v725*/, off, s33 offset:2456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v726*/, off, s33 offset:2460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v727*/, off, s33 offset:2464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v728*/, off, s33 offset:2468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v729*/, off, s33 offset:2472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v730*/, off, s33 offset:2476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v731*/, off, s33 offset:2480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v732*/, off, s33 offset:2484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v733*/, off, s33 offset:2488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v734*/, off, s33 offset:2492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v735*/, off, s33 offset:2496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v736*/, off, s33 offset:2500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v737*/, off, s33 offset:2504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v738*/, off, s33 offset:2508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v739*/, off, s33 offset:2512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v740*/, off, s33 offset:2516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v741*/, off, s33 offset:2520 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v742*/, off, s33 offset:2524
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v743*/, off, s33 offset:2528
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v744*/, off, s33 offset:2532
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v745*/, off, s33 offset:2536
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v746*/, off, s33 offset:2540
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v747*/, off, s33 offset:2544
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v748*/, off, s33 offset:2548
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v749*/, off, s33 offset:2552
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v750*/, off, s33 offset:2556
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v751*/, off, s33 offset:2560
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v752*/, off, s33 offset:2564
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v753*/, off, s33 offset:2568
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v754*/, off, s33 offset:2572
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v755*/, off, s33 offset:2576
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v756*/, off, s33 offset:2580
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v757*/, off, s33 offset:2584
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v758*/, off, s33 offset:2588
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v759*/, off, s33 offset:2592
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v760*/, off, s33 offset:2596
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v761*/, off, s33 offset:2600
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v762*/, off, s33 offset:2604
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v763*/, off, s33 offset:2608
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v764*/, off, s33 offset:2612
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v765*/, off, s33 offset:2616
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v766*/, off, s33 offset:2620
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v767*/, off, s33 offset:2624
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v742*/, off, s33 offset:2524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v743*/, off, s33 offset:2528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v744*/, off, s33 offset:2532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v745*/, off, s33 offset:2536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v746*/, off, s33 offset:2540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v747*/, off, s33 offset:2544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v748*/, off, s33 offset:2548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v749*/, off, s33 offset:2552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v750*/, off, s33 offset:2556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v751*/, off, s33 offset:2560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v752*/, off, s33 offset:2564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v753*/, off, s33 offset:2568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v754*/, off, s33 offset:2572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v755*/, off, s33 offset:2576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v756*/, off, s33 offset:2580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v757*/, off, s33 offset:2584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v758*/, off, s33 offset:2588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v759*/, off, s33 offset:2592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v760*/, off, s33 offset:2596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v761*/, off, s33 offset:2600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v762*/, off, s33 offset:2604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v763*/, off, s33 offset:2608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v764*/, off, s33 offset:2612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v765*/, off, s33 offset:2616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v766*/, off, s33 offset:2620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v767*/, off, s33 offset:2624 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x80c0 ; msbs: dst=3 src0=0 src1=0 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v768*/, off, s33 offset:2628
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v769*/, off, s33 offset:2632
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v770*/, off, s33 offset:2636
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v771*/, off, s33 offset:2640
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v772*/, off, s33 offset:2644
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v773*/, off, s33 offset:2648
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v774*/, off, s33 offset:2652
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v775*/, off, s33 offset:2656
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v776*/, off, s33 offset:2660
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v777*/, off, s33 offset:2664
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v778*/, off, s33 offset:2668
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v779*/, off, s33 offset:2672
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v780*/, off, s33 offset:2676
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v781*/, off, s33 offset:2680
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v782*/, off, s33 offset:2684
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v783*/, off, s33 offset:2688
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v784*/, off, s33 offset:2692
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v785*/, off, s33 offset:2696
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v786*/, off, s33 offset:2700
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v787*/, off, s33 offset:2704
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v788*/, off, s33 offset:2708
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v789*/, off, s33 offset:2712
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v790*/, off, s33 offset:2716
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v791*/, off, s33 offset:2720
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v792*/, off, s33 offset:2724
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v793*/, off, s33 offset:2728
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v794*/, off, s33 offset:2732
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v795*/, off, s33 offset:2736
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v796*/, off, s33 offset:2740
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v797*/, off, s33 offset:2744
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v798*/, off, s33 offset:2748
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v799*/, off, s33 offset:2752
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v800*/, off, s33 offset:2756
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v801*/, off, s33 offset:2760
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v802*/, off, s33 offset:2764
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v803*/, off, s33 offset:2768
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v804*/, off, s33 offset:2772
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v768*/, off, s33 offset:2628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v769*/, off, s33 offset:2632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v770*/, off, s33 offset:2636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v771*/, off, s33 offset:2640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v772*/, off, s33 offset:2644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v773*/, off, s33 offset:2648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v774*/, off, s33 offset:2652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v775*/, off, s33 offset:2656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v776*/, off, s33 offset:2660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v777*/, off, s33 offset:2664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v778*/, off, s33 offset:2668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v779*/, off, s33 offset:2672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v780*/, off, s33 offset:2676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v781*/, off, s33 offset:2680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v782*/, off, s33 offset:2684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v783*/, off, s33 offset:2688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v784*/, off, s33 offset:2692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v785*/, off, s33 offset:2696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v786*/, off, s33 offset:2700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v787*/, off, s33 offset:2704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v788*/, off, s33 offset:2708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v789*/, off, s33 offset:2712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v790*/, off, s33 offset:2716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v791*/, off, s33 offset:2720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v792*/, off, s33 offset:2724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v793*/, off, s33 offset:2728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v794*/, off, s33 offset:2732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v795*/, off, s33 offset:2736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v796*/, off, s33 offset:2740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v797*/, off, s33 offset:2744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v798*/, off, s33 offset:2748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v799*/, off, s33 offset:2752 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v800*/, off, s33 offset:2756 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v801*/, off, s33 offset:2760 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v802*/, off, s33 offset:2764 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v803*/, off, s33 offset:2768 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v804*/, off, s33 offset:2772 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v805*/, off, s33 offset:2776
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v806*/, off, s33 offset:2780
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v807*/, off, s33 offset:2784
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v808*/, off, s33 offset:2788
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v809*/, off, s33 offset:2792
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v810*/, off, s33 offset:2796
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v811*/, off, s33 offset:2800
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v812*/, off, s33 offset:2804
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v813*/, off, s33 offset:2808
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v814*/, off, s33 offset:2812
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v815*/, off, s33 offset:2816
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v816*/, off, s33 offset:2820
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v817*/, off, s33 offset:2824
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v818*/, off, s33 offset:2828
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v819*/, off, s33 offset:2832
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v820*/, off, s33 offset:2836
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v821*/, off, s33 offset:2840
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v822*/, off, s33 offset:2844
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v823*/, off, s33 offset:2848
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v824*/, off, s33 offset:2852
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v825*/, off, s33 offset:2856
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v826*/, off, s33 offset:2860
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v827*/, off, s33 offset:2864
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v828*/, off, s33 offset:2868
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v829*/, off, s33 offset:2872
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v830*/, off, s33 offset:2876
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v831*/, off, s33 offset:2880
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v832*/, off, s33 offset:2884
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v833*/, off, s33 offset:2888
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v834*/, off, s33 offset:2892
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v835*/, off, s33 offset:2896
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v836*/, off, s33 offset:2900
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v837*/, off, s33 offset:2904
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v838*/, off, s33 offset:2908
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v839*/, off, s33 offset:2912
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v840*/, off, s33 offset:2916
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v841*/, off, s33 offset:2920
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v842*/, off, s33 offset:2924
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v843*/, off, s33 offset:2928
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v844*/, off, s33 offset:2932
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v845*/, off, s33 offset:2936
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v846*/, off, s33 offset:2940
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v847*/, off, s33 offset:2944
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v848*/, off, s33 offset:2948
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v849*/, off, s33 offset:2952
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v850*/, off, s33 offset:2956
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v851*/, off, s33 offset:2960
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v852*/, off, s33 offset:2964
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v853*/, off, s33 offset:2968
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v854*/, off, s33 offset:2972
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v855*/, off, s33 offset:2976
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v856*/, off, s33 offset:2980
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v857*/, off, s33 offset:2984
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v858*/, off, s33 offset:2988
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v859*/, off, s33 offset:2992
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v860*/, off, s33 offset:2996
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v861*/, off, s33 offset:3000
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v862*/, off, s33 offset:3004
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v863*/, off, s33 offset:3008
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v864*/, off, s33 offset:3012
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v865*/, off, s33 offset:3016
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v866*/, off, s33 offset:3020
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v867*/, off, s33 offset:3024
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v805*/, off, s33 offset:2776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v806*/, off, s33 offset:2780 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v807*/, off, s33 offset:2784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v808*/, off, s33 offset:2788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v809*/, off, s33 offset:2792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v810*/, off, s33 offset:2796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v811*/, off, s33 offset:2800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v812*/, off, s33 offset:2804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v813*/, off, s33 offset:2808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v814*/, off, s33 offset:2812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v815*/, off, s33 offset:2816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v816*/, off, s33 offset:2820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v817*/, off, s33 offset:2824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v818*/, off, s33 offset:2828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v819*/, off, s33 offset:2832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v820*/, off, s33 offset:2836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v821*/, off, s33 offset:2840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v822*/, off, s33 offset:2844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v823*/, off, s33 offset:2848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v824*/, off, s33 offset:2852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v825*/, off, s33 offset:2856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v826*/, off, s33 offset:2860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v827*/, off, s33 offset:2864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v828*/, off, s33 offset:2868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v829*/, off, s33 offset:2872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v830*/, off, s33 offset:2876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v831*/, off, s33 offset:2880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v832*/, off, s33 offset:2884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v833*/, off, s33 offset:2888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v834*/, off, s33 offset:2892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v835*/, off, s33 offset:2896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v836*/, off, s33 offset:2900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v837*/, off, s33 offset:2904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v838*/, off, s33 offset:2908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v839*/, off, s33 offset:2912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v840*/, off, s33 offset:2916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v841*/, off, s33 offset:2920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v842*/, off, s33 offset:2924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v843*/, off, s33 offset:2928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v844*/, off, s33 offset:2932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v845*/, off, s33 offset:2936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v846*/, off, s33 offset:2940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v847*/, off, s33 offset:2944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v848*/, off, s33 offset:2948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v849*/, off, s33 offset:2952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v850*/, off, s33 offset:2956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v851*/, off, s33 offset:2960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v852*/, off, s33 offset:2964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v853*/, off, s33 offset:2968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v854*/, off, s33 offset:2972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v855*/, off, s33 offset:2976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v856*/, off, s33 offset:2980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v857*/, off, s33 offset:2984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v858*/, off, s33 offset:2988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v859*/, off, s33 offset:2992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v860*/, off, s33 offset:2996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v861*/, off, s33 offset:3000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v862*/, off, s33 offset:3004 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v863*/, off, s33 offset:3008 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v864*/, off, s33 offset:3012 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v865*/, off, s33 offset:3016 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v866*/, off, s33 offset:3020 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v867*/, off, s33 offset:3024 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v868*/, off, s33 offset:3028
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v869*/, off, s33 offset:3032
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v870*/, off, s33 offset:3036
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v871*/, off, s33 offset:3040
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v872*/, off, s33 offset:3044
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v873*/, off, s33 offset:3048
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v874*/, off, s33 offset:3052
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v875*/, off, s33 offset:3056
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v876*/, off, s33 offset:3060
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v877*/, off, s33 offset:3064
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v878*/, off, s33 offset:3068
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v879*/, off, s33 offset:3072
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v880*/, off, s33 offset:3076
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v881*/, off, s33 offset:3080
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v882*/, off, s33 offset:3084
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v883*/, off, s33 offset:3088
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v884*/, off, s33 offset:3092
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v885*/, off, s33 offset:3096
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v886*/, off, s33 offset:3100
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v887*/, off, s33 offset:3104
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v888*/, off, s33 offset:3108
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v889*/, off, s33 offset:3112
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v890*/, off, s33 offset:3116
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v891*/, off, s33 offset:3120
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v892*/, off, s33 offset:3124
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v893*/, off, s33 offset:3128
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v894*/, off, s33 offset:3132
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v895*/, off, s33 offset:3136
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v896*/, off, s33 offset:3140
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v897*/, off, s33 offset:3144
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v898*/, off, s33 offset:3148
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v899*/, off, s33 offset:3152
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v900*/, off, s33 offset:3156
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v901*/, off, s33 offset:3160
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v902*/, off, s33 offset:3164
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v903*/, off, s33 offset:3168
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v904*/, off, s33 offset:3172
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v905*/, off, s33 offset:3176
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v906*/, off, s33 offset:3180
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v907*/, off, s33 offset:3184
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v908*/, off, s33 offset:3188
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v909*/, off, s33 offset:3192
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v910*/, off, s33 offset:3196
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v911*/, off, s33 offset:3200
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v912*/, off, s33 offset:3204
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v913*/, off, s33 offset:3208
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v914*/, off, s33 offset:3212
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v915*/, off, s33 offset:3216
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v916*/, off, s33 offset:3220
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v917*/, off, s33 offset:3224
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v918*/, off, s33 offset:3228
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v919*/, off, s33 offset:3232
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v920*/, off, s33 offset:3236
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v921*/, off, s33 offset:3240
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v922*/, off, s33 offset:3244
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v923*/, off, s33 offset:3248
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v924*/, off, s33 offset:3252
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v925*/, off, s33 offset:3256
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v926*/, off, s33 offset:3260
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v927*/, off, s33 offset:3264
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v928*/, off, s33 offset:3268
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v929*/, off, s33 offset:3272
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v930*/, off, s33 offset:3276
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v868*/, off, s33 offset:3028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v869*/, off, s33 offset:3032 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v870*/, off, s33 offset:3036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v871*/, off, s33 offset:3040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v872*/, off, s33 offset:3044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v873*/, off, s33 offset:3048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v874*/, off, s33 offset:3052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v875*/, off, s33 offset:3056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v876*/, off, s33 offset:3060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v877*/, off, s33 offset:3064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v878*/, off, s33 offset:3068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v879*/, off, s33 offset:3072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v880*/, off, s33 offset:3076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v881*/, off, s33 offset:3080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v882*/, off, s33 offset:3084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v883*/, off, s33 offset:3088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v884*/, off, s33 offset:3092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v885*/, off, s33 offset:3096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v886*/, off, s33 offset:3100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v887*/, off, s33 offset:3104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v888*/, off, s33 offset:3108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v889*/, off, s33 offset:3112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v890*/, off, s33 offset:3116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v891*/, off, s33 offset:3120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v892*/, off, s33 offset:3124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v893*/, off, s33 offset:3128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v894*/, off, s33 offset:3132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v895*/, off, s33 offset:3136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v896*/, off, s33 offset:3140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v897*/, off, s33 offset:3144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v898*/, off, s33 offset:3148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v899*/, off, s33 offset:3152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v900*/, off, s33 offset:3156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v901*/, off, s33 offset:3160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v902*/, off, s33 offset:3164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v903*/, off, s33 offset:3168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v904*/, off, s33 offset:3172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v905*/, off, s33 offset:3176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v906*/, off, s33 offset:3180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v907*/, off, s33 offset:3184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v908*/, off, s33 offset:3188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v909*/, off, s33 offset:3192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v910*/, off, s33 offset:3196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v911*/, off, s33 offset:3200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v912*/, off, s33 offset:3204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v913*/, off, s33 offset:3208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v914*/, off, s33 offset:3212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v915*/, off, s33 offset:3216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v916*/, off, s33 offset:3220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v917*/, off, s33 offset:3224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v918*/, off, s33 offset:3228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v919*/, off, s33 offset:3232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v920*/, off, s33 offset:3236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v921*/, off, s33 offset:3240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v922*/, off, s33 offset:3244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v923*/, off, s33 offset:3248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v924*/, off, s33 offset:3252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v925*/, off, s33 offset:3256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v926*/, off, s33 offset:3260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v927*/, off, s33 offset:3264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v928*/, off, s33 offset:3268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v929*/, off, s33 offset:3272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v930*/, off, s33 offset:3276 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v931*/, off, s33 offset:3280
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v932*/, off, s33 offset:3284
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v933*/, off, s33 offset:3288
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v934*/, off, s33 offset:3292
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v935*/, off, s33 offset:3296
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v936*/, off, s33 offset:3300
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v937*/, off, s33 offset:3304
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v938*/, off, s33 offset:3308
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v939*/, off, s33 offset:3312
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v940*/, off, s33 offset:3316
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v941*/, off, s33 offset:3320
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v942*/, off, s33 offset:3324
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v943*/, off, s33 offset:3328
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v944*/, off, s33 offset:3332
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v945*/, off, s33 offset:3336
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v946*/, off, s33 offset:3340
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v947*/, off, s33 offset:3344
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v948*/, off, s33 offset:3348
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v949*/, off, s33 offset:3352
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v950*/, off, s33 offset:3356
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v951*/, off, s33 offset:3360
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v952*/, off, s33 offset:3364
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v953*/, off, s33 offset:3368
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v954*/, off, s33 offset:3372
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v955*/, off, s33 offset:3376
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v956*/, off, s33 offset:3380
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v957*/, off, s33 offset:3384
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v958*/, off, s33 offset:3388
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v959*/, off, s33 offset:3392
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v960*/, off, s33 offset:3396
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v961*/, off, s33 offset:3400
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v962*/, off, s33 offset:3404
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v963*/, off, s33 offset:3408
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v964*/, off, s33 offset:3412
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v965*/, off, s33 offset:3416
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v966*/, off, s33 offset:3420
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v967*/, off, s33 offset:3424
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v968*/, off, s33 offset:3428
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v969*/, off, s33 offset:3432
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v970*/, off, s33 offset:3436
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v971*/, off, s33 offset:3440
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v972*/, off, s33 offset:3444
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v973*/, off, s33 offset:3448
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v974*/, off, s33 offset:3452
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v975*/, off, s33 offset:3456
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v976*/, off, s33 offset:3460
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v977*/, off, s33 offset:3464
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v978*/, off, s33 offset:3468
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v979*/, off, s33 offset:3472
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v980*/, off, s33 offset:3476
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v981*/, off, s33 offset:3480
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v982*/, off, s33 offset:3484
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v983*/, off, s33 offset:3488
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v984*/, off, s33 offset:3492
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v985*/, off, s33 offset:3496
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v986*/, off, s33 offset:3500
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v987*/, off, s33 offset:3504
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v988*/, off, s33 offset:3508
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v989*/, off, s33 offset:3512
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v990*/, off, s33 offset:3516
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v991*/, off, s33 offset:3520
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v992*/, off, s33 offset:3524
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v993*/, off, s33 offset:3528
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v931*/, off, s33 offset:3280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v932*/, off, s33 offset:3284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v933*/, off, s33 offset:3288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v934*/, off, s33 offset:3292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v935*/, off, s33 offset:3296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v936*/, off, s33 offset:3300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v937*/, off, s33 offset:3304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v938*/, off, s33 offset:3308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v939*/, off, s33 offset:3312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v940*/, off, s33 offset:3316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v941*/, off, s33 offset:3320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v942*/, off, s33 offset:3324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v943*/, off, s33 offset:3328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v944*/, off, s33 offset:3332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v945*/, off, s33 offset:3336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v946*/, off, s33 offset:3340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v947*/, off, s33 offset:3344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v948*/, off, s33 offset:3348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v949*/, off, s33 offset:3352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v950*/, off, s33 offset:3356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v951*/, off, s33 offset:3360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v952*/, off, s33 offset:3364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v953*/, off, s33 offset:3368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v954*/, off, s33 offset:3372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v955*/, off, s33 offset:3376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v956*/, off, s33 offset:3380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v957*/, off, s33 offset:3384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v958*/, off, s33 offset:3388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v959*/, off, s33 offset:3392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v960*/, off, s33 offset:3396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v961*/, off, s33 offset:3400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v962*/, off, s33 offset:3404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v963*/, off, s33 offset:3408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v964*/, off, s33 offset:3412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v965*/, off, s33 offset:3416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v966*/, off, s33 offset:3420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v967*/, off, s33 offset:3424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v968*/, off, s33 offset:3428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v969*/, off, s33 offset:3432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v970*/, off, s33 offset:3436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v971*/, off, s33 offset:3440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v972*/, off, s33 offset:3444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v973*/, off, s33 offset:3448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v974*/, off, s33 offset:3452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v975*/, off, s33 offset:3456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v976*/, off, s33 offset:3460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v977*/, off, s33 offset:3464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v978*/, off, s33 offset:3468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v979*/, off, s33 offset:3472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v980*/, off, s33 offset:3476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v981*/, off, s33 offset:3480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v982*/, off, s33 offset:3484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v983*/, off, s33 offset:3488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v984*/, off, s33 offset:3492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v985*/, off, s33 offset:3496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v986*/, off, s33 offset:3500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v987*/, off, s33 offset:3504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v988*/, off, s33 offset:3508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v989*/, off, s33 offset:3512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v990*/, off, s33 offset:3516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v991*/, off, s33 offset:3520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v992*/, off, s33 offset:3524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v993*/, off, s33 offset:3528 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x1d ; 120-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v994*/, off, s33 offset:3532
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v995*/, off, s33 offset:3536
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v996*/, off, s33 offset:3540
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v997*/, off, s33 offset:3544
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v998*/, off, s33 offset:3548
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v999*/, off, s33 offset:3552
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v1000*/, off, s33 offset:3556
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v1001*/, off, s33 offset:3560
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v1002*/, off, s33 offset:3564
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v1003*/, off, s33 offset:3568
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v1004*/, off, s33 offset:3572
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v1005*/, off, s33 offset:3576
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v1006*/, off, s33 offset:3580
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v1007*/, off, s33 offset:3584
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v1008*/, off, s33 offset:3588
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v1009*/, off, s33 offset:3592
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v1010*/, off, s33 offset:3596
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v1011*/, off, s33 offset:3600
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v1012*/, off, s33 offset:3604
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v1013*/, off, s33 offset:3608
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v1014*/, off, s33 offset:3612
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v1015*/, off, s33 offset:3616
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v1016*/, off, s33 offset:3620
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v1017*/, off, s33 offset:3624
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v1018*/, off, s33 offset:3628
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v1019*/, off, s33 offset:3632
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v1020*/, off, s33 offset:3636
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v1021*/, off, s33 offset:3640
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v1022*/, off, s33 offset:3644
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v1023*/, off, s33 offset:3648
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v994*/, off, s33 offset:3532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v995*/, off, s33 offset:3536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v996*/, off, s33 offset:3540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v997*/, off, s33 offset:3544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v998*/, off, s33 offset:3548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v999*/, off, s33 offset:3552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v1000*/, off, s33 offset:3556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v1001*/, off, s33 offset:3560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v1002*/, off, s33 offset:3564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v1003*/, off, s33 offset:3568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v1004*/, off, s33 offset:3572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v1005*/, off, s33 offset:3576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v1006*/, off, s33 offset:3580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v1007*/, off, s33 offset:3584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v1008*/, off, s33 offset:3588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v1009*/, off, s33 offset:3592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v1010*/, off, s33 offset:3596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v1011*/, off, s33 offset:3600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v1012*/, off, s33 offset:3604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v1013*/, off, s33 offset:3608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v1014*/, off, s33 offset:3612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v1015*/, off, s33 offset:3616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v1016*/, off, s33 offset:3620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v1017*/, off, s33 offset:3624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v1018*/, off, s33 offset:3628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v1019*/, off, s33 offset:3632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v1020*/, off, s33 offset:3636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v1021*/, off, s33 offset:3640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v1022*/, off, s33 offset:3644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v1023*/, off, s33 offset:3648 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, s4
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 s33, s0
@@ -5939,935 +5939,935 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ
 ; GFX1250-DAGISEL-NEXT:    s_wait_kmcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 s0, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2, s32 offset:8
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3, s32 offset:12
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4, s32 offset:16
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5, s32 offset:20
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6, s32 offset:24
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7, s32 offset:28
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8, s32 offset:32
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9, s32 offset:36
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10, s32 offset:40
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11, s32 offset:44
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12, s32 offset:48
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13, s32 offset:52
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14, s32 offset:56
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15, s32 offset:60
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16, s32 offset:64
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17, s32 offset:68
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18, s32 offset:72
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19, s32 offset:76
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20, s32 offset:80
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21, s32 offset:84
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22, s32 offset:88
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23, s32 offset:92
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24, s32 offset:96
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25, s32 offset:100
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26, s32 offset:104
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27, s32 offset:108
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28, s32 offset:112
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29, s32 offset:116
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30, s32 offset:120
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31, s32 offset:124
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32, s32 offset:128
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33, s32 offset:132
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34, s32 offset:136
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35, s32 offset:140
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36, s32 offset:144
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37, s32 offset:148
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38, s32 offset:152
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39, s32 offset:156
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48, s32 offset:160
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49, s32 offset:164
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50, s32 offset:168
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51, s32 offset:172
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52, s32 offset:176
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53, s32 offset:180
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54, s32 offset:184
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55, s32 offset:188
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64, s32 offset:192
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65, s32 offset:196
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66, s32 offset:200
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67, s32 offset:204
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68, s32 offset:208
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69, s32 offset:212
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70, s32 offset:216
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71, s32 offset:220
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80, s32 offset:224
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81, s32 offset:228
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82, s32 offset:232
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83, s32 offset:236
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84, s32 offset:240
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85, s32 offset:244
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86, s32 offset:248
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s32 offset:4 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2, s32 offset:8 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3, s32 offset:12 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4, s32 offset:16 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5, s32 offset:20 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6, s32 offset:24 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7, s32 offset:28 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8, s32 offset:32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9, s32 offset:36 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10, s32 offset:40 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11, s32 offset:44 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12, s32 offset:48 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13, s32 offset:52 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14, s32 offset:56 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15, s32 offset:60 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16, s32 offset:64 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17, s32 offset:68 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18, s32 offset:72 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19, s32 offset:76 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20, s32 offset:80 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21, s32 offset:84 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22, s32 offset:88 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23, s32 offset:92 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24, s32 offset:96 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25, s32 offset:100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26, s32 offset:104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27, s32 offset:108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28, s32 offset:112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29, s32 offset:116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30, s32 offset:120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31, s32 offset:124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32, s32 offset:128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33, s32 offset:132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34, s32 offset:136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35, s32 offset:140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36, s32 offset:144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37, s32 offset:148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38, s32 offset:152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39, s32 offset:156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48, s32 offset:160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49, s32 offset:164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50, s32 offset:168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51, s32 offset:172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52, s32 offset:176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53, s32 offset:180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54, s32 offset:184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55, s32 offset:188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64, s32 offset:192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65, s32 offset:196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66, s32 offset:200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67, s32 offset:204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68, s32 offset:208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69, s32 offset:212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70, s32 offset:216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71, s32 offset:220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80, s32 offset:224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81, s32 offset:228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82, s32 offset:232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83, s32 offset:236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84, s32 offset:240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85, s32 offset:244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86, s32 offset:248 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87, s32 offset:252
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96, s32 offset:256
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97, s32 offset:260
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98, s32 offset:264
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99, s32 offset:268
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100, s32 offset:272
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101, s32 offset:276
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102, s32 offset:280
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103, s32 offset:284
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112, s32 offset:288
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113, s32 offset:292
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114, s32 offset:296
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115, s32 offset:300
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116, s32 offset:304
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117, s32 offset:308
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118, s32 offset:312
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119, s32 offset:316
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128, s32 offset:320
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129, s32 offset:324
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130, s32 offset:328
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131, s32 offset:332
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132, s32 offset:336
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133, s32 offset:340
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134, s32 offset:344
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135, s32 offset:348
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144, s32 offset:352
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145, s32 offset:356
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146, s32 offset:360
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147, s32 offset:364
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148, s32 offset:368
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149, s32 offset:372
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150, s32 offset:376
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151, s32 offset:380
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160, s32 offset:384
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161, s32 offset:388
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162, s32 offset:392
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163, s32 offset:396
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164, s32 offset:400
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165, s32 offset:404
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166, s32 offset:408
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167, s32 offset:412
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176, s32 offset:416
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177, s32 offset:420
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178, s32 offset:424
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179, s32 offset:428
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180, s32 offset:432
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181, s32 offset:436
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182, s32 offset:440
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183, s32 offset:444
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192, s32 offset:448
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193, s32 offset:452
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194, s32 offset:456
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195, s32 offset:460
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196, s32 offset:464
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197, s32 offset:468
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198, s32 offset:472
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199, s32 offset:476
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208, s32 offset:480
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209, s32 offset:484
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210, s32 offset:488
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211, s32 offset:492
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212, s32 offset:496
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213, s32 offset:500
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87, s32 offset:252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96, s32 offset:256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97, s32 offset:260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98, s32 offset:264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99, s32 offset:268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100, s32 offset:272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101, s32 offset:276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102, s32 offset:280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103, s32 offset:284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112, s32 offset:288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113, s32 offset:292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114, s32 offset:296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115, s32 offset:300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116, s32 offset:304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117, s32 offset:308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118, s32 offset:312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119, s32 offset:316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128, s32 offset:320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129, s32 offset:324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130, s32 offset:328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131, s32 offset:332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132, s32 offset:336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133, s32 offset:340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134, s32 offset:344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135, s32 offset:348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144, s32 offset:352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145, s32 offset:356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146, s32 offset:360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147, s32 offset:364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148, s32 offset:368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149, s32 offset:372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150, s32 offset:376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151, s32 offset:380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160, s32 offset:384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161, s32 offset:388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162, s32 offset:392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163, s32 offset:396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164, s32 offset:400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165, s32 offset:404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166, s32 offset:408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167, s32 offset:412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176, s32 offset:416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177, s32 offset:420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178, s32 offset:424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179, s32 offset:428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180, s32 offset:432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181, s32 offset:436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182, s32 offset:440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183, s32 offset:444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192, s32 offset:448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193, s32 offset:452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194, s32 offset:456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195, s32 offset:460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196, s32 offset:464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197, s32 offset:468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198, s32 offset:472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199, s32 offset:476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208, s32 offset:480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209, s32 offset:484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210, s32 offset:488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211, s32 offset:492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212, s32 offset:496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213, s32 offset:500 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214, s32 offset:504
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215, s32 offset:508
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224, s32 offset:512
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225, s32 offset:516
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226, s32 offset:520
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227, s32 offset:524
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228, s32 offset:528
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229, s32 offset:532
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230, s32 offset:536
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231, s32 offset:540
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240, s32 offset:544
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241, s32 offset:548
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242, s32 offset:552
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243, s32 offset:556
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244, s32 offset:560
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245, s32 offset:564
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246, s32 offset:568
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247, s32 offset:572
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214, s32 offset:504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215, s32 offset:508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224, s32 offset:512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225, s32 offset:516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226, s32 offset:520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227, s32 offset:524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228, s32 offset:528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229, s32 offset:532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230, s32 offset:536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231, s32 offset:540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240, s32 offset:544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241, s32 offset:548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242, s32 offset:552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243, s32 offset:556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244, s32 offset:560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245, s32 offset:564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246, s32 offset:568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247, s32 offset:572 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 4 ; msbs: dst=0 src0=0 src1=1 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v256*/, s32 offset:576
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v257*/, s32 offset:580
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v258*/, s32 offset:584
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v259*/, s32 offset:588
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v260*/, s32 offset:592
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v261*/, s32 offset:596
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v262*/, s32 offset:600
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v263*/, s32 offset:604
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v264*/, s32 offset:608
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v265*/, s32 offset:612
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v266*/, s32 offset:616
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v267*/, s32 offset:620
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v268*/, s32 offset:624
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v269*/, s32 offset:628
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v270*/, s32 offset:632
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v271*/, s32 offset:636
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v272*/, s32 offset:640
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v273*/, s32 offset:644
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v274*/, s32 offset:648
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v275*/, s32 offset:652
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v276*/, s32 offset:656
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v277*/, s32 offset:660
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v278*/, s32 offset:664
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v279*/, s32 offset:668
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v280*/, s32 offset:672
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v281*/, s32 offset:676
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v282*/, s32 offset:680
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v283*/, s32 offset:684
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v284*/, s32 offset:688
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v285*/, s32 offset:692
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v286*/, s32 offset:696
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v287*/, s32 offset:700
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v288*/, s32 offset:704
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v289*/, s32 offset:708
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v290*/, s32 offset:712
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v291*/, s32 offset:716
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v292*/, s32 offset:720
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v293*/, s32 offset:724
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v294*/, s32 offset:728
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v295*/, s32 offset:732
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v296*/, s32 offset:736
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v297*/, s32 offset:740
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v298*/, s32 offset:744
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v299*/, s32 offset:748
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v300*/, s32 offset:752
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v256*/, s32 offset:576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v257*/, s32 offset:580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v258*/, s32 offset:584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v259*/, s32 offset:588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v260*/, s32 offset:592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v261*/, s32 offset:596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v262*/, s32 offset:600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v263*/, s32 offset:604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v264*/, s32 offset:608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v265*/, s32 offset:612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v266*/, s32 offset:616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v267*/, s32 offset:620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v268*/, s32 offset:624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v269*/, s32 offset:628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v270*/, s32 offset:632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v271*/, s32 offset:636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v272*/, s32 offset:640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v273*/, s32 offset:644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v274*/, s32 offset:648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v275*/, s32 offset:652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v276*/, s32 offset:656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v277*/, s32 offset:660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v278*/, s32 offset:664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v279*/, s32 offset:668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v280*/, s32 offset:672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v281*/, s32 offset:676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v282*/, s32 offset:680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v283*/, s32 offset:684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v284*/, s32 offset:688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v285*/, s32 offset:692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v286*/, s32 offset:696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v287*/, s32 offset:700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v288*/, s32 offset:704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v289*/, s32 offset:708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v290*/, s32 offset:712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v291*/, s32 offset:716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v292*/, s32 offset:720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v293*/, s32 offset:724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v294*/, s32 offset:728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v295*/, s32 offset:732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v296*/, s32 offset:736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v297*/, s32 offset:740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v298*/, s32 offset:744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v299*/, s32 offset:748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v300*/, s32 offset:752 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v301*/, s32 offset:756
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v302*/, s32 offset:760
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v303*/, s32 offset:764
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v304*/, s32 offset:768
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v305*/, s32 offset:772
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v306*/, s32 offset:776
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v307*/, s32 offset:780
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v308*/, s32 offset:784
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v309*/, s32 offset:788
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v310*/, s32 offset:792
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v311*/, s32 offset:796
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v312*/, s32 offset:800
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v313*/, s32 offset:804
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v314*/, s32 offset:808
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v315*/, s32 offset:812
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v316*/, s32 offset:816
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v317*/, s32 offset:820
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v318*/, s32 offset:824
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v319*/, s32 offset:828
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v320*/, s32 offset:832
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v321*/, s32 offset:836
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v322*/, s32 offset:840
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v323*/, s32 offset:844
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v324*/, s32 offset:848
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v325*/, s32 offset:852
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v326*/, s32 offset:856
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v327*/, s32 offset:860
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v328*/, s32 offset:864
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v329*/, s32 offset:868
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v330*/, s32 offset:872
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v331*/, s32 offset:876
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v332*/, s32 offset:880
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v333*/, s32 offset:884
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v334*/, s32 offset:888
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v335*/, s32 offset:892
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v336*/, s32 offset:896
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v337*/, s32 offset:900
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v338*/, s32 offset:904
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v339*/, s32 offset:908
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v340*/, s32 offset:912
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v341*/, s32 offset:916
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v342*/, s32 offset:920
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v343*/, s32 offset:924
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v344*/, s32 offset:928
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v345*/, s32 offset:932
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v346*/, s32 offset:936
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v347*/, s32 offset:940
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v348*/, s32 offset:944
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v349*/, s32 offset:948
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v350*/, s32 offset:952
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v351*/, s32 offset:956
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v352*/, s32 offset:960
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v353*/, s32 offset:964
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v354*/, s32 offset:968
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v355*/, s32 offset:972
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v356*/, s32 offset:976
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v357*/, s32 offset:980
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v358*/, s32 offset:984
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v359*/, s32 offset:988
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v360*/, s32 offset:992
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v361*/, s32 offset:996
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v362*/, s32 offset:1000
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v363*/, s32 offset:1004
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v301*/, s32 offset:756 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v302*/, s32 offset:760 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v303*/, s32 offset:764 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v304*/, s32 offset:768 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v305*/, s32 offset:772 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v306*/, s32 offset:776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v307*/, s32 offset:780 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v308*/, s32 offset:784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v309*/, s32 offset:788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v310*/, s32 offset:792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v311*/, s32 offset:796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v312*/, s32 offset:800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v313*/, s32 offset:804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v314*/, s32 offset:808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v315*/, s32 offset:812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v316*/, s32 offset:816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v317*/, s32 offset:820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v318*/, s32 offset:824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v319*/, s32 offset:828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v320*/, s32 offset:832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v321*/, s32 offset:836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v322*/, s32 offset:840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v323*/, s32 offset:844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v324*/, s32 offset:848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v325*/, s32 offset:852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v326*/, s32 offset:856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v327*/, s32 offset:860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v328*/, s32 offset:864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v329*/, s32 offset:868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v330*/, s32 offset:872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v331*/, s32 offset:876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v332*/, s32 offset:880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v333*/, s32 offset:884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v334*/, s32 offset:888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v335*/, s32 offset:892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v336*/, s32 offset:896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v337*/, s32 offset:900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v338*/, s32 offset:904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v339*/, s32 offset:908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v340*/, s32 offset:912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v341*/, s32 offset:916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v342*/, s32 offset:920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v343*/, s32 offset:924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v344*/, s32 offset:928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v345*/, s32 offset:932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v346*/, s32 offset:936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v347*/, s32 offset:940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v348*/, s32 offset:944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v349*/, s32 offset:948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v350*/, s32 offset:952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v351*/, s32 offset:956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v352*/, s32 offset:960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v353*/, s32 offset:964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v354*/, s32 offset:968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v355*/, s32 offset:972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v356*/, s32 offset:976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v357*/, s32 offset:980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v358*/, s32 offset:984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v359*/, s32 offset:988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v360*/, s32 offset:992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v361*/, s32 offset:996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v362*/, s32 offset:1000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v363*/, s32 offset:1004 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v364*/, s32 offset:1008
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v365*/, s32 offset:1012
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v366*/, s32 offset:1016
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v367*/, s32 offset:1020
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v368*/, s32 offset:1024
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v369*/, s32 offset:1028
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v370*/, s32 offset:1032
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v371*/, s32 offset:1036
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v372*/, s32 offset:1040
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v373*/, s32 offset:1044
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v374*/, s32 offset:1048
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v375*/, s32 offset:1052
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v376*/, s32 offset:1056
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v377*/, s32 offset:1060
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v378*/, s32 offset:1064
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v379*/, s32 offset:1068
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v380*/, s32 offset:1072
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v381*/, s32 offset:1076
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v382*/, s32 offset:1080
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v383*/, s32 offset:1084
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v384*/, s32 offset:1088
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v385*/, s32 offset:1092
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v386*/, s32 offset:1096
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v387*/, s32 offset:1100
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v388*/, s32 offset:1104
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v389*/, s32 offset:1108
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v390*/, s32 offset:1112
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v391*/, s32 offset:1116
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v392*/, s32 offset:1120
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v393*/, s32 offset:1124
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v394*/, s32 offset:1128
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v395*/, s32 offset:1132
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v396*/, s32 offset:1136
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v397*/, s32 offset:1140
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v398*/, s32 offset:1144
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v399*/, s32 offset:1148
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v400*/, s32 offset:1152
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v401*/, s32 offset:1156
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v402*/, s32 offset:1160
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v403*/, s32 offset:1164
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v404*/, s32 offset:1168
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v405*/, s32 offset:1172
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v406*/, s32 offset:1176
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v407*/, s32 offset:1180
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v408*/, s32 offset:1184
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v409*/, s32 offset:1188
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v410*/, s32 offset:1192
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v411*/, s32 offset:1196
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v412*/, s32 offset:1200
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v413*/, s32 offset:1204
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v414*/, s32 offset:1208
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v415*/, s32 offset:1212
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v416*/, s32 offset:1216
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v417*/, s32 offset:1220
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v418*/, s32 offset:1224
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v419*/, s32 offset:1228
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v420*/, s32 offset:1232
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v421*/, s32 offset:1236
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v422*/, s32 offset:1240
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v423*/, s32 offset:1244
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v424*/, s32 offset:1248
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v425*/, s32 offset:1252
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v426*/, s32 offset:1256
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v364*/, s32 offset:1008 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v365*/, s32 offset:1012 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v366*/, s32 offset:1016 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v367*/, s32 offset:1020 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v368*/, s32 offset:1024 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v369*/, s32 offset:1028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v370*/, s32 offset:1032 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v371*/, s32 offset:1036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v372*/, s32 offset:1040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v373*/, s32 offset:1044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v374*/, s32 offset:1048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v375*/, s32 offset:1052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v376*/, s32 offset:1056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v377*/, s32 offset:1060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v378*/, s32 offset:1064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v379*/, s32 offset:1068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v380*/, s32 offset:1072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v381*/, s32 offset:1076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v382*/, s32 offset:1080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v383*/, s32 offset:1084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v384*/, s32 offset:1088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v385*/, s32 offset:1092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v386*/, s32 offset:1096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v387*/, s32 offset:1100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v388*/, s32 offset:1104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v389*/, s32 offset:1108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v390*/, s32 offset:1112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v391*/, s32 offset:1116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v392*/, s32 offset:1120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v393*/, s32 offset:1124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v394*/, s32 offset:1128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v395*/, s32 offset:1132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v396*/, s32 offset:1136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v397*/, s32 offset:1140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v398*/, s32 offset:1144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v399*/, s32 offset:1148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v400*/, s32 offset:1152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v401*/, s32 offset:1156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v402*/, s32 offset:1160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v403*/, s32 offset:1164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v404*/, s32 offset:1168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v405*/, s32 offset:1172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v406*/, s32 offset:1176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v407*/, s32 offset:1180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v408*/, s32 offset:1184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v409*/, s32 offset:1188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v410*/, s32 offset:1192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v411*/, s32 offset:1196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v412*/, s32 offset:1200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v413*/, s32 offset:1204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v414*/, s32 offset:1208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v415*/, s32 offset:1212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v416*/, s32 offset:1216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v417*/, s32 offset:1220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v418*/, s32 offset:1224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v419*/, s32 offset:1228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v420*/, s32 offset:1232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v421*/, s32 offset:1236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v422*/, s32 offset:1240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v423*/, s32 offset:1244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v424*/, s32 offset:1248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v425*/, s32 offset:1252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v426*/, s32 offset:1256 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v427*/, s32 offset:1260
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v428*/, s32 offset:1264
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v429*/, s32 offset:1268
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v430*/, s32 offset:1272
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v431*/, s32 offset:1276
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v432*/, s32 offset:1280
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v433*/, s32 offset:1284
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v434*/, s32 offset:1288
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v435*/, s32 offset:1292
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v436*/, s32 offset:1296
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v437*/, s32 offset:1300
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v438*/, s32 offset:1304
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v439*/, s32 offset:1308
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v440*/, s32 offset:1312
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v441*/, s32 offset:1316
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v442*/, s32 offset:1320
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v443*/, s32 offset:1324
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v444*/, s32 offset:1328
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v445*/, s32 offset:1332
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v446*/, s32 offset:1336
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v447*/, s32 offset:1340
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v448*/, s32 offset:1344
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v449*/, s32 offset:1348
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v450*/, s32 offset:1352
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v451*/, s32 offset:1356
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v452*/, s32 offset:1360
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v453*/, s32 offset:1364
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v454*/, s32 offset:1368
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v455*/, s32 offset:1372
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v456*/, s32 offset:1376
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v457*/, s32 offset:1380
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v458*/, s32 offset:1384
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v459*/, s32 offset:1388
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v460*/, s32 offset:1392
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v461*/, s32 offset:1396
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v462*/, s32 offset:1400
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v463*/, s32 offset:1404
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v464*/, s32 offset:1408
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v465*/, s32 offset:1412
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v466*/, s32 offset:1416
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v467*/, s32 offset:1420
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v468*/, s32 offset:1424
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v469*/, s32 offset:1428
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v470*/, s32 offset:1432
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v471*/, s32 offset:1436
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v472*/, s32 offset:1440
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v473*/, s32 offset:1444
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v474*/, s32 offset:1448
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v475*/, s32 offset:1452
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v476*/, s32 offset:1456
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v477*/, s32 offset:1460
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v478*/, s32 offset:1464
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v479*/, s32 offset:1468
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v480*/, s32 offset:1472
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v481*/, s32 offset:1476
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v482*/, s32 offset:1480
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v483*/, s32 offset:1484
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v484*/, s32 offset:1488
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v485*/, s32 offset:1492
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v486*/, s32 offset:1496
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v487*/, s32 offset:1500
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v488*/, s32 offset:1504
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v489*/, s32 offset:1508
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v427*/, s32 offset:1260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v428*/, s32 offset:1264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v429*/, s32 offset:1268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v430*/, s32 offset:1272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v431*/, s32 offset:1276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v432*/, s32 offset:1280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v433*/, s32 offset:1284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v434*/, s32 offset:1288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v435*/, s32 offset:1292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v436*/, s32 offset:1296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v437*/, s32 offset:1300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v438*/, s32 offset:1304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v439*/, s32 offset:1308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v440*/, s32 offset:1312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v441*/, s32 offset:1316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v442*/, s32 offset:1320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v443*/, s32 offset:1324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v444*/, s32 offset:1328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v445*/, s32 offset:1332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v446*/, s32 offset:1336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v447*/, s32 offset:1340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v448*/, s32 offset:1344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v449*/, s32 offset:1348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v450*/, s32 offset:1352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v451*/, s32 offset:1356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v452*/, s32 offset:1360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v453*/, s32 offset:1364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v454*/, s32 offset:1368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v455*/, s32 offset:1372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v456*/, s32 offset:1376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v457*/, s32 offset:1380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v458*/, s32 offset:1384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v459*/, s32 offset:1388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v460*/, s32 offset:1392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v461*/, s32 offset:1396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v462*/, s32 offset:1400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v463*/, s32 offset:1404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v464*/, s32 offset:1408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v465*/, s32 offset:1412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v466*/, s32 offset:1416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v467*/, s32 offset:1420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v468*/, s32 offset:1424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v469*/, s32 offset:1428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v470*/, s32 offset:1432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v471*/, s32 offset:1436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v472*/, s32 offset:1440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v473*/, s32 offset:1444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v474*/, s32 offset:1448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v475*/, s32 offset:1452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v476*/, s32 offset:1456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v477*/, s32 offset:1460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v478*/, s32 offset:1464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v479*/, s32 offset:1468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v480*/, s32 offset:1472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v481*/, s32 offset:1476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v482*/, s32 offset:1480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v483*/, s32 offset:1484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v484*/, s32 offset:1488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v485*/, s32 offset:1492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v486*/, s32 offset:1496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v487*/, s32 offset:1500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v488*/, s32 offset:1504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v489*/, s32 offset:1508 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v490*/, s32 offset:1512
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v491*/, s32 offset:1516
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v492*/, s32 offset:1520
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v493*/, s32 offset:1524
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v494*/, s32 offset:1528
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v495*/, s32 offset:1532
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v496*/, s32 offset:1536
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v497*/, s32 offset:1540
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v498*/, s32 offset:1544
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v499*/, s32 offset:1548
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v500*/, s32 offset:1552
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v501*/, s32 offset:1556
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v502*/, s32 offset:1560
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v503*/, s32 offset:1564
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v504*/, s32 offset:1568
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v505*/, s32 offset:1572
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v506*/, s32 offset:1576
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v507*/, s32 offset:1580
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v508*/, s32 offset:1584
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v509*/, s32 offset:1588
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v510*/, s32 offset:1592
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v511*/, s32 offset:1596
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v490*/, s32 offset:1512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v491*/, s32 offset:1516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v492*/, s32 offset:1520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v493*/, s32 offset:1524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v494*/, s32 offset:1528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v495*/, s32 offset:1532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v496*/, s32 offset:1536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v497*/, s32 offset:1540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v498*/, s32 offset:1544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v499*/, s32 offset:1548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v500*/, s32 offset:1552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v501*/, s32 offset:1556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v502*/, s32 offset:1560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v503*/, s32 offset:1564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v504*/, s32 offset:1568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v505*/, s32 offset:1572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v506*/, s32 offset:1576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v507*/, s32 offset:1580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v508*/, s32 offset:1584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v509*/, s32 offset:1588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v510*/, s32 offset:1592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v511*/, s32 offset:1596 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x408 ; msbs: dst=0 src0=0 src1=2 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v512*/, s32 offset:1600
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v513*/, s32 offset:1604
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v514*/, s32 offset:1608
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v515*/, s32 offset:1612
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v516*/, s32 offset:1616
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v517*/, s32 offset:1620
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v518*/, s32 offset:1624
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v519*/, s32 offset:1628
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v520*/, s32 offset:1632
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v521*/, s32 offset:1636
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v522*/, s32 offset:1640
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v523*/, s32 offset:1644
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v524*/, s32 offset:1648
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v525*/, s32 offset:1652
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v526*/, s32 offset:1656
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v527*/, s32 offset:1660
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v528*/, s32 offset:1664
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v529*/, s32 offset:1668
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v530*/, s32 offset:1672
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v531*/, s32 offset:1676
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v532*/, s32 offset:1680
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v533*/, s32 offset:1684
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v534*/, s32 offset:1688
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v535*/, s32 offset:1692
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v536*/, s32 offset:1696
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v537*/, s32 offset:1700
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v538*/, s32 offset:1704
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v539*/, s32 offset:1708
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v540*/, s32 offset:1712
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v541*/, s32 offset:1716
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v542*/, s32 offset:1720
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v543*/, s32 offset:1724
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v544*/, s32 offset:1728
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v545*/, s32 offset:1732
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v546*/, s32 offset:1736
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v547*/, s32 offset:1740
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v548*/, s32 offset:1744
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v549*/, s32 offset:1748
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v550*/, s32 offset:1752
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v551*/, s32 offset:1756
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v552*/, s32 offset:1760
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v512*/, s32 offset:1600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v513*/, s32 offset:1604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v514*/, s32 offset:1608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v515*/, s32 offset:1612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v516*/, s32 offset:1616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v517*/, s32 offset:1620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v518*/, s32 offset:1624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v519*/, s32 offset:1628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v520*/, s32 offset:1632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v521*/, s32 offset:1636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v522*/, s32 offset:1640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v523*/, s32 offset:1644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v524*/, s32 offset:1648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v525*/, s32 offset:1652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v526*/, s32 offset:1656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v527*/, s32 offset:1660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v528*/, s32 offset:1664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v529*/, s32 offset:1668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v530*/, s32 offset:1672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v531*/, s32 offset:1676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v532*/, s32 offset:1680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v533*/, s32 offset:1684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v534*/, s32 offset:1688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v535*/, s32 offset:1692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v536*/, s32 offset:1696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v537*/, s32 offset:1700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v538*/, s32 offset:1704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v539*/, s32 offset:1708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v540*/, s32 offset:1712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v541*/, s32 offset:1716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v542*/, s32 offset:1720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v543*/, s32 offset:1724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v544*/, s32 offset:1728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v545*/, s32 offset:1732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v546*/, s32 offset:1736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v547*/, s32 offset:1740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v548*/, s32 offset:1744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v549*/, s32 offset:1748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v550*/, s32 offset:1752 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v551*/, s32 offset:1756 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v552*/, s32 offset:1760 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v553*/, s32 offset:1764
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v554*/, s32 offset:1768
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v555*/, s32 offset:1772
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v556*/, s32 offset:1776
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v557*/, s32 offset:1780
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v558*/, s32 offset:1784
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v559*/, s32 offset:1788
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v560*/, s32 offset:1792
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v561*/, s32 offset:1796
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v562*/, s32 offset:1800
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v563*/, s32 offset:1804
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v564*/, s32 offset:1808
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v565*/, s32 offset:1812
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v566*/, s32 offset:1816
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v567*/, s32 offset:1820
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v568*/, s32 offset:1824
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v569*/, s32 offset:1828
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v570*/, s32 offset:1832
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v571*/, s32 offset:1836
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v572*/, s32 offset:1840
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v573*/, s32 offset:1844
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v574*/, s32 offset:1848
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v575*/, s32 offset:1852
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v576*/, s32 offset:1856
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v577*/, s32 offset:1860
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v578*/, s32 offset:1864
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v579*/, s32 offset:1868
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v580*/, s32 offset:1872
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v581*/, s32 offset:1876
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v582*/, s32 offset:1880
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v583*/, s32 offset:1884
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v584*/, s32 offset:1888
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v585*/, s32 offset:1892
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v586*/, s32 offset:1896
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v587*/, s32 offset:1900
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v588*/, s32 offset:1904
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v589*/, s32 offset:1908
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v590*/, s32 offset:1912
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v591*/, s32 offset:1916
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v592*/, s32 offset:1920
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v593*/, s32 offset:1924
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v594*/, s32 offset:1928
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v595*/, s32 offset:1932
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v596*/, s32 offset:1936
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v597*/, s32 offset:1940
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v598*/, s32 offset:1944
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v599*/, s32 offset:1948
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v600*/, s32 offset:1952
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v601*/, s32 offset:1956
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v602*/, s32 offset:1960
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v603*/, s32 offset:1964
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v604*/, s32 offset:1968
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v605*/, s32 offset:1972
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v606*/, s32 offset:1976
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v607*/, s32 offset:1980
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v608*/, s32 offset:1984
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v609*/, s32 offset:1988
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v610*/, s32 offset:1992
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v611*/, s32 offset:1996
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v612*/, s32 offset:2000
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v613*/, s32 offset:2004
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v614*/, s32 offset:2008
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v615*/, s32 offset:2012
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v553*/, s32 offset:1764 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v554*/, s32 offset:1768 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v555*/, s32 offset:1772 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v556*/, s32 offset:1776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v557*/, s32 offset:1780 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v558*/, s32 offset:1784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v559*/, s32 offset:1788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v560*/, s32 offset:1792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v561*/, s32 offset:1796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v562*/, s32 offset:1800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v563*/, s32 offset:1804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v564*/, s32 offset:1808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v565*/, s32 offset:1812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v566*/, s32 offset:1816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v567*/, s32 offset:1820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v568*/, s32 offset:1824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v569*/, s32 offset:1828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v570*/, s32 offset:1832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v571*/, s32 offset:1836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v572*/, s32 offset:1840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v573*/, s32 offset:1844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v574*/, s32 offset:1848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v575*/, s32 offset:1852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v576*/, s32 offset:1856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v577*/, s32 offset:1860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v578*/, s32 offset:1864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v579*/, s32 offset:1868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v580*/, s32 offset:1872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v581*/, s32 offset:1876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v582*/, s32 offset:1880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v583*/, s32 offset:1884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v584*/, s32 offset:1888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v585*/, s32 offset:1892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v586*/, s32 offset:1896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v587*/, s32 offset:1900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v588*/, s32 offset:1904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v589*/, s32 offset:1908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v590*/, s32 offset:1912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v591*/, s32 offset:1916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v592*/, s32 offset:1920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v593*/, s32 offset:1924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v594*/, s32 offset:1928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v595*/, s32 offset:1932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v596*/, s32 offset:1936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v597*/, s32 offset:1940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v598*/, s32 offset:1944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v599*/, s32 offset:1948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v600*/, s32 offset:1952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v601*/, s32 offset:1956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v602*/, s32 offset:1960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v603*/, s32 offset:1964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v604*/, s32 offset:1968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v605*/, s32 offset:1972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v606*/, s32 offset:1976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v607*/, s32 offset:1980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v608*/, s32 offset:1984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v609*/, s32 offset:1988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v610*/, s32 offset:1992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v611*/, s32 offset:1996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v612*/, s32 offset:2000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v613*/, s32 offset:2004 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v614*/, s32 offset:2008 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v615*/, s32 offset:2012 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v616*/, s32 offset:2016
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v617*/, s32 offset:2020
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v618*/, s32 offset:2024
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v619*/, s32 offset:2028
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v620*/, s32 offset:2032
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v621*/, s32 offset:2036
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v622*/, s32 offset:2040
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v623*/, s32 offset:2044
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v624*/, s32 offset:2048
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v625*/, s32 offset:2052
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v626*/, s32 offset:2056
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v627*/, s32 offset:2060
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v628*/, s32 offset:2064
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v629*/, s32 offset:2068
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v630*/, s32 offset:2072
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v631*/, s32 offset:2076
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v632*/, s32 offset:2080
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v633*/, s32 offset:2084
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v634*/, s32 offset:2088
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v635*/, s32 offset:2092
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v636*/, s32 offset:2096
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v637*/, s32 offset:2100
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v638*/, s32 offset:2104
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v639*/, s32 offset:2108
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v640*/, s32 offset:2112
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v641*/, s32 offset:2116
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v642*/, s32 offset:2120
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v643*/, s32 offset:2124
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v644*/, s32 offset:2128
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v645*/, s32 offset:2132
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v646*/, s32 offset:2136
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v647*/, s32 offset:2140
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v648*/, s32 offset:2144
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v649*/, s32 offset:2148
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v650*/, s32 offset:2152
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v651*/, s32 offset:2156
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v652*/, s32 offset:2160
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v653*/, s32 offset:2164
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v654*/, s32 offset:2168
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v655*/, s32 offset:2172
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v656*/, s32 offset:2176
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v657*/, s32 offset:2180
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v658*/, s32 offset:2184
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v659*/, s32 offset:2188
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v660*/, s32 offset:2192
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v661*/, s32 offset:2196
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v662*/, s32 offset:2200
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v663*/, s32 offset:2204
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v664*/, s32 offset:2208
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v665*/, s32 offset:2212
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v666*/, s32 offset:2216
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v667*/, s32 offset:2220
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v668*/, s32 offset:2224
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v669*/, s32 offset:2228
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v670*/, s32 offset:2232
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v671*/, s32 offset:2236
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v672*/, s32 offset:2240
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v673*/, s32 offset:2244
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v674*/, s32 offset:2248
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v675*/, s32 offset:2252
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v676*/, s32 offset:2256
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v677*/, s32 offset:2260
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v678*/, s32 offset:2264
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v616*/, s32 offset:2016 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v617*/, s32 offset:2020 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v618*/, s32 offset:2024 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v619*/, s32 offset:2028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v620*/, s32 offset:2032 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v621*/, s32 offset:2036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v622*/, s32 offset:2040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v623*/, s32 offset:2044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v624*/, s32 offset:2048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v625*/, s32 offset:2052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v626*/, s32 offset:2056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v627*/, s32 offset:2060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v628*/, s32 offset:2064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v629*/, s32 offset:2068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v630*/, s32 offset:2072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v631*/, s32 offset:2076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v632*/, s32 offset:2080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v633*/, s32 offset:2084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v634*/, s32 offset:2088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v635*/, s32 offset:2092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v636*/, s32 offset:2096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v637*/, s32 offset:2100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v638*/, s32 offset:2104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v639*/, s32 offset:2108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v640*/, s32 offset:2112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v641*/, s32 offset:2116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v642*/, s32 offset:2120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v643*/, s32 offset:2124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v644*/, s32 offset:2128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v645*/, s32 offset:2132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v646*/, s32 offset:2136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v647*/, s32 offset:2140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v648*/, s32 offset:2144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v649*/, s32 offset:2148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v650*/, s32 offset:2152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v651*/, s32 offset:2156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v652*/, s32 offset:2160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v653*/, s32 offset:2164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v654*/, s32 offset:2168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v655*/, s32 offset:2172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v656*/, s32 offset:2176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v657*/, s32 offset:2180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v658*/, s32 offset:2184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v659*/, s32 offset:2188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v660*/, s32 offset:2192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v661*/, s32 offset:2196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v662*/, s32 offset:2200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v663*/, s32 offset:2204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v664*/, s32 offset:2208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v665*/, s32 offset:2212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v666*/, s32 offset:2216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v667*/, s32 offset:2220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v668*/, s32 offset:2224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v669*/, s32 offset:2228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v670*/, s32 offset:2232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v671*/, s32 offset:2236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v672*/, s32 offset:2240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v673*/, s32 offset:2244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v674*/, s32 offset:2248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v675*/, s32 offset:2252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v676*/, s32 offset:2256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v677*/, s32 offset:2260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v678*/, s32 offset:2264 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v679*/, s32 offset:2268
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v680*/, s32 offset:2272
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v681*/, s32 offset:2276
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v682*/, s32 offset:2280
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v683*/, s32 offset:2284
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v684*/, s32 offset:2288
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v685*/, s32 offset:2292
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v686*/, s32 offset:2296
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v687*/, s32 offset:2300
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v688*/, s32 offset:2304
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v689*/, s32 offset:2308
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v690*/, s32 offset:2312
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v691*/, s32 offset:2316
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v692*/, s32 offset:2320
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v693*/, s32 offset:2324
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v694*/, s32 offset:2328
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v695*/, s32 offset:2332
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v696*/, s32 offset:2336
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v697*/, s32 offset:2340
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v698*/, s32 offset:2344
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v699*/, s32 offset:2348
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v700*/, s32 offset:2352
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v701*/, s32 offset:2356
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v702*/, s32 offset:2360
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v703*/, s32 offset:2364
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v704*/, s32 offset:2368
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v705*/, s32 offset:2372
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v706*/, s32 offset:2376
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v707*/, s32 offset:2380
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v708*/, s32 offset:2384
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v709*/, s32 offset:2388
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v710*/, s32 offset:2392
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v711*/, s32 offset:2396
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v712*/, s32 offset:2400
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v713*/, s32 offset:2404
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v714*/, s32 offset:2408
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v715*/, s32 offset:2412
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v716*/, s32 offset:2416
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v717*/, s32 offset:2420
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v718*/, s32 offset:2424
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v719*/, s32 offset:2428
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v720*/, s32 offset:2432
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v721*/, s32 offset:2436
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v722*/, s32 offset:2440
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v723*/, s32 offset:2444
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v724*/, s32 offset:2448
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v725*/, s32 offset:2452
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v726*/, s32 offset:2456
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v727*/, s32 offset:2460
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v728*/, s32 offset:2464
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v729*/, s32 offset:2468
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v730*/, s32 offset:2472
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v731*/, s32 offset:2476
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v732*/, s32 offset:2480
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v733*/, s32 offset:2484
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v734*/, s32 offset:2488
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v735*/, s32 offset:2492
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v736*/, s32 offset:2496
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v737*/, s32 offset:2500
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v738*/, s32 offset:2504
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v739*/, s32 offset:2508
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v740*/, s32 offset:2512
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v741*/, s32 offset:2516
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v679*/, s32 offset:2268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v680*/, s32 offset:2272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v681*/, s32 offset:2276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v682*/, s32 offset:2280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v683*/, s32 offset:2284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v684*/, s32 offset:2288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v685*/, s32 offset:2292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v686*/, s32 offset:2296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v687*/, s32 offset:2300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v688*/, s32 offset:2304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v689*/, s32 offset:2308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v690*/, s32 offset:2312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v691*/, s32 offset:2316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v692*/, s32 offset:2320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v693*/, s32 offset:2324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v694*/, s32 offset:2328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v695*/, s32 offset:2332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v696*/, s32 offset:2336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v697*/, s32 offset:2340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v698*/, s32 offset:2344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v699*/, s32 offset:2348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v700*/, s32 offset:2352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v701*/, s32 offset:2356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v702*/, s32 offset:2360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v703*/, s32 offset:2364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v704*/, s32 offset:2368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v705*/, s32 offset:2372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v706*/, s32 offset:2376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v707*/, s32 offset:2380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v708*/, s32 offset:2384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v709*/, s32 offset:2388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v710*/, s32 offset:2392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v711*/, s32 offset:2396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v712*/, s32 offset:2400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v713*/, s32 offset:2404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v714*/, s32 offset:2408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v715*/, s32 offset:2412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v716*/, s32 offset:2416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v717*/, s32 offset:2420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v718*/, s32 offset:2424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v719*/, s32 offset:2428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v720*/, s32 offset:2432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v721*/, s32 offset:2436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v722*/, s32 offset:2440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v723*/, s32 offset:2444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v724*/, s32 offset:2448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v725*/, s32 offset:2452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v726*/, s32 offset:2456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v727*/, s32 offset:2460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v728*/, s32 offset:2464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v729*/, s32 offset:2468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v730*/, s32 offset:2472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v731*/, s32 offset:2476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v732*/, s32 offset:2480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v733*/, s32 offset:2484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v734*/, s32 offset:2488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v735*/, s32 offset:2492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v736*/, s32 offset:2496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v737*/, s32 offset:2500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v738*/, s32 offset:2504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v739*/, s32 offset:2508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v740*/, s32 offset:2512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v741*/, s32 offset:2516 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v742*/, s32 offset:2520
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v743*/, s32 offset:2524
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v744*/, s32 offset:2528
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v745*/, s32 offset:2532
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v746*/, s32 offset:2536
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v747*/, s32 offset:2540
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v748*/, s32 offset:2544
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v749*/, s32 offset:2548
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v750*/, s32 offset:2552
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v751*/, s32 offset:2556
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v752*/, s32 offset:2560
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v753*/, s32 offset:2564
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v754*/, s32 offset:2568
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v755*/, s32 offset:2572
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v756*/, s32 offset:2576
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v757*/, s32 offset:2580
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v758*/, s32 offset:2584
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v759*/, s32 offset:2588
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v760*/, s32 offset:2592
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v761*/, s32 offset:2596
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v762*/, s32 offset:2600
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v763*/, s32 offset:2604
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v764*/, s32 offset:2608
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v765*/, s32 offset:2612
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v766*/, s32 offset:2616
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v767*/, s32 offset:2620
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v742*/, s32 offset:2520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v743*/, s32 offset:2524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v744*/, s32 offset:2528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v745*/, s32 offset:2532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v746*/, s32 offset:2536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v747*/, s32 offset:2540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v748*/, s32 offset:2544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v749*/, s32 offset:2548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v750*/, s32 offset:2552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v751*/, s32 offset:2556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v752*/, s32 offset:2560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v753*/, s32 offset:2564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v754*/, s32 offset:2568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v755*/, s32 offset:2572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v756*/, s32 offset:2576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v757*/, s32 offset:2580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v758*/, s32 offset:2584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v759*/, s32 offset:2588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v760*/, s32 offset:2592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v761*/, s32 offset:2596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v762*/, s32 offset:2600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v763*/, s32 offset:2604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v764*/, s32 offset:2608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v765*/, s32 offset:2612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v766*/, s32 offset:2616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v767*/, s32 offset:2620 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x80c ; msbs: dst=0 src0=0 src1=3 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v768*/, s32 offset:2624
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v769*/, s32 offset:2628
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v770*/, s32 offset:2632
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v771*/, s32 offset:2636
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v772*/, s32 offset:2640
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v773*/, s32 offset:2644
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v774*/, s32 offset:2648
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v775*/, s32 offset:2652
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v776*/, s32 offset:2656
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v777*/, s32 offset:2660
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v778*/, s32 offset:2664
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v779*/, s32 offset:2668
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v780*/, s32 offset:2672
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v781*/, s32 offset:2676
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v782*/, s32 offset:2680
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v783*/, s32 offset:2684
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v784*/, s32 offset:2688
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v785*/, s32 offset:2692
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v786*/, s32 offset:2696
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v787*/, s32 offset:2700
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v788*/, s32 offset:2704
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v789*/, s32 offset:2708
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v790*/, s32 offset:2712
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v791*/, s32 offset:2716
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v792*/, s32 offset:2720
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v793*/, s32 offset:2724
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v794*/, s32 offset:2728
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v795*/, s32 offset:2732
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v796*/, s32 offset:2736
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v797*/, s32 offset:2740
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v798*/, s32 offset:2744
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v799*/, s32 offset:2748
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v800*/, s32 offset:2752
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v801*/, s32 offset:2756
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v802*/, s32 offset:2760
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v803*/, s32 offset:2764
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v804*/, s32 offset:2768
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v768*/, s32 offset:2624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v769*/, s32 offset:2628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v770*/, s32 offset:2632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v771*/, s32 offset:2636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v772*/, s32 offset:2640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v773*/, s32 offset:2644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v774*/, s32 offset:2648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v775*/, s32 offset:2652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v776*/, s32 offset:2656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v777*/, s32 offset:2660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v778*/, s32 offset:2664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v779*/, s32 offset:2668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v780*/, s32 offset:2672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v781*/, s32 offset:2676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v782*/, s32 offset:2680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v783*/, s32 offset:2684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v784*/, s32 offset:2688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v785*/, s32 offset:2692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v786*/, s32 offset:2696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v787*/, s32 offset:2700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v788*/, s32 offset:2704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v789*/, s32 offset:2708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v790*/, s32 offset:2712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v791*/, s32 offset:2716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v792*/, s32 offset:2720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v793*/, s32 offset:2724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v794*/, s32 offset:2728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v795*/, s32 offset:2732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v796*/, s32 offset:2736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v797*/, s32 offset:2740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v798*/, s32 offset:2744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v799*/, s32 offset:2748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v800*/, s32 offset:2752 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v801*/, s32 offset:2756 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v802*/, s32 offset:2760 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v803*/, s32 offset:2764 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v804*/, s32 offset:2768 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v805*/, s32 offset:2772
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v806*/, s32 offset:2776
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v807*/, s32 offset:2780
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v808*/, s32 offset:2784
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v809*/, s32 offset:2788
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v810*/, s32 offset:2792
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v811*/, s32 offset:2796
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v812*/, s32 offset:2800
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v813*/, s32 offset:2804
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v814*/, s32 offset:2808
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v815*/, s32 offset:2812
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v816*/, s32 offset:2816
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v817*/, s32 offset:2820
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v818*/, s32 offset:2824
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v819*/, s32 offset:2828
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v820*/, s32 offset:2832
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v821*/, s32 offset:2836
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v822*/, s32 offset:2840
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v823*/, s32 offset:2844
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v824*/, s32 offset:2848
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v825*/, s32 offset:2852
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v826*/, s32 offset:2856
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v827*/, s32 offset:2860
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v828*/, s32 offset:2864
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v829*/, s32 offset:2868
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v830*/, s32 offset:2872
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v831*/, s32 offset:2876
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v832*/, s32 offset:2880
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v833*/, s32 offset:2884
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v834*/, s32 offset:2888
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v835*/, s32 offset:2892
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v836*/, s32 offset:2896
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v837*/, s32 offset:2900
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v838*/, s32 offset:2904
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v839*/, s32 offset:2908
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v840*/, s32 offset:2912
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v841*/, s32 offset:2916
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v842*/, s32 offset:2920
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v843*/, s32 offset:2924
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v844*/, s32 offset:2928
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v845*/, s32 offset:2932
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v846*/, s32 offset:2936
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v847*/, s32 offset:2940
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v848*/, s32 offset:2944
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v849*/, s32 offset:2948
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v850*/, s32 offset:2952
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v851*/, s32 offset:2956
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v852*/, s32 offset:2960
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v853*/, s32 offset:2964
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v854*/, s32 offset:2968
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v855*/, s32 offset:2972
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v856*/, s32 offset:2976
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v857*/, s32 offset:2980
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v858*/, s32 offset:2984
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v859*/, s32 offset:2988
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v860*/, s32 offset:2992
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v861*/, s32 offset:2996
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v862*/, s32 offset:3000
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v863*/, s32 offset:3004
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v864*/, s32 offset:3008
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v865*/, s32 offset:3012
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v866*/, s32 offset:3016
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v867*/, s32 offset:3020
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v805*/, s32 offset:2772 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v806*/, s32 offset:2776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v807*/, s32 offset:2780 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v808*/, s32 offset:2784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v809*/, s32 offset:2788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v810*/, s32 offset:2792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v811*/, s32 offset:2796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v812*/, s32 offset:2800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v813*/, s32 offset:2804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v814*/, s32 offset:2808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v815*/, s32 offset:2812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v816*/, s32 offset:2816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v817*/, s32 offset:2820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v818*/, s32 offset:2824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v819*/, s32 offset:2828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v820*/, s32 offset:2832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v821*/, s32 offset:2836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v822*/, s32 offset:2840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v823*/, s32 offset:2844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v824*/, s32 offset:2848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v825*/, s32 offset:2852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v826*/, s32 offset:2856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v827*/, s32 offset:2860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v828*/, s32 offset:2864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v829*/, s32 offset:2868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v830*/, s32 offset:2872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v831*/, s32 offset:2876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v832*/, s32 offset:2880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v833*/, s32 offset:2884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v834*/, s32 offset:2888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v835*/, s32 offset:2892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v836*/, s32 offset:2896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v837*/, s32 offset:2900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v838*/, s32 offset:2904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v839*/, s32 offset:2908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v840*/, s32 offset:2912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v841*/, s32 offset:2916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v842*/, s32 offset:2920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v843*/, s32 offset:2924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v844*/, s32 offset:2928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v845*/, s32 offset:2932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v846*/, s32 offset:2936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v847*/, s32 offset:2940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v848*/, s32 offset:2944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v849*/, s32 offset:2948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v850*/, s32 offset:2952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v851*/, s32 offset:2956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v852*/, s32 offset:2960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v853*/, s32 offset:2964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v854*/, s32 offset:2968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v855*/, s32 offset:2972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v856*/, s32 offset:2976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v857*/, s32 offset:2980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v858*/, s32 offset:2984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v859*/, s32 offset:2988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v860*/, s32 offset:2992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v861*/, s32 offset:2996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v862*/, s32 offset:3000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v863*/, s32 offset:3004 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v864*/, s32 offset:3008 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v865*/, s32 offset:3012 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v866*/, s32 offset:3016 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v867*/, s32 offset:3020 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v868*/, s32 offset:3024
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v869*/, s32 offset:3028
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v870*/, s32 offset:3032
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v871*/, s32 offset:3036
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v872*/, s32 offset:3040
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v873*/, s32 offset:3044
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v874*/, s32 offset:3048
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v875*/, s32 offset:3052
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v876*/, s32 offset:3056
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v877*/, s32 offset:3060
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v878*/, s32 offset:3064
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v879*/, s32 offset:3068
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v880*/, s32 offset:3072
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v881*/, s32 offset:3076
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v882*/, s32 offset:3080
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v883*/, s32 offset:3084
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v884*/, s32 offset:3088
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v885*/, s32 offset:3092
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v886*/, s32 offset:3096
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v887*/, s32 offset:3100
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v888*/, s32 offset:3104
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v889*/, s32 offset:3108
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v890*/, s32 offset:3112
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v891*/, s32 offset:3116
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v892*/, s32 offset:3120
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v893*/, s32 offset:3124
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v894*/, s32 offset:3128
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v895*/, s32 offset:3132
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v896*/, s32 offset:3136
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v897*/, s32 offset:3140
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v898*/, s32 offset:3144
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v899*/, s32 offset:3148
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v900*/, s32 offset:3152
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v901*/, s32 offset:3156
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v902*/, s32 offset:3160
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v903*/, s32 offset:3164
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v904*/, s32 offset:3168
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v905*/, s32 offset:3172
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v906*/, s32 offset:3176
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v907*/, s32 offset:3180
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v908*/, s32 offset:3184
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v909*/, s32 offset:3188
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v910*/, s32 offset:3192
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v911*/, s32 offset:3196
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v912*/, s32 offset:3200
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v913*/, s32 offset:3204
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v914*/, s32 offset:3208
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v915*/, s32 offset:3212
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v916*/, s32 offset:3216
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v917*/, s32 offset:3220
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v918*/, s32 offset:3224
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v919*/, s32 offset:3228
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v920*/, s32 offset:3232
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v921*/, s32 offset:3236
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v922*/, s32 offset:3240
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v923*/, s32 offset:3244
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v924*/, s32 offset:3248
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v925*/, s32 offset:3252
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v926*/, s32 offset:3256
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v927*/, s32 offset:3260
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v928*/, s32 offset:3264
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v929*/, s32 offset:3268
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v930*/, s32 offset:3272
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v868*/, s32 offset:3024 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v869*/, s32 offset:3028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v870*/, s32 offset:3032 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v871*/, s32 offset:3036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v872*/, s32 offset:3040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v873*/, s32 offset:3044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v874*/, s32 offset:3048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v875*/, s32 offset:3052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v876*/, s32 offset:3056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v877*/, s32 offset:3060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v878*/, s32 offset:3064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v879*/, s32 offset:3068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v880*/, s32 offset:3072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v881*/, s32 offset:3076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v882*/, s32 offset:3080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v883*/, s32 offset:3084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v884*/, s32 offset:3088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v885*/, s32 offset:3092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v886*/, s32 offset:3096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v887*/, s32 offset:3100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v888*/, s32 offset:3104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v889*/, s32 offset:3108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v890*/, s32 offset:3112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v891*/, s32 offset:3116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v892*/, s32 offset:3120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v893*/, s32 offset:3124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v894*/, s32 offset:3128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v895*/, s32 offset:3132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v896*/, s32 offset:3136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v897*/, s32 offset:3140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v898*/, s32 offset:3144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v899*/, s32 offset:3148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v900*/, s32 offset:3152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v901*/, s32 offset:3156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v902*/, s32 offset:3160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v903*/, s32 offset:3164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v904*/, s32 offset:3168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v905*/, s32 offset:3172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v906*/, s32 offset:3176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v907*/, s32 offset:3180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v908*/, s32 offset:3184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v909*/, s32 offset:3188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v910*/, s32 offset:3192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v911*/, s32 offset:3196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v912*/, s32 offset:3200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v913*/, s32 offset:3204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v914*/, s32 offset:3208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v915*/, s32 offset:3212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v916*/, s32 offset:3216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v917*/, s32 offset:3220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v918*/, s32 offset:3224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v919*/, s32 offset:3228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v920*/, s32 offset:3232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v921*/, s32 offset:3236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v922*/, s32 offset:3240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v923*/, s32 offset:3244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v924*/, s32 offset:3248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v925*/, s32 offset:3252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v926*/, s32 offset:3256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v927*/, s32 offset:3260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v928*/, s32 offset:3264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v929*/, s32 offset:3268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v930*/, s32 offset:3272 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v931*/, s32 offset:3276
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v932*/, s32 offset:3280
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v933*/, s32 offset:3284
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v934*/, s32 offset:3288
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v935*/, s32 offset:3292
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v936*/, s32 offset:3296
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v937*/, s32 offset:3300
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v938*/, s32 offset:3304
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v939*/, s32 offset:3308
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v940*/, s32 offset:3312
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v941*/, s32 offset:3316
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v942*/, s32 offset:3320
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v943*/, s32 offset:3324
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v944*/, s32 offset:3328
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v945*/, s32 offset:3332
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v946*/, s32 offset:3336
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v947*/, s32 offset:3340
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v948*/, s32 offset:3344
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v949*/, s32 offset:3348
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v950*/, s32 offset:3352
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v951*/, s32 offset:3356
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v952*/, s32 offset:3360
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v953*/, s32 offset:3364
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v954*/, s32 offset:3368
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v955*/, s32 offset:3372
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v956*/, s32 offset:3376
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v957*/, s32 offset:3380
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v958*/, s32 offset:3384
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v959*/, s32 offset:3388
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v960*/, s32 offset:3392
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v961*/, s32 offset:3396
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v962*/, s32 offset:3400
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v963*/, s32 offset:3404
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v964*/, s32 offset:3408
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v965*/, s32 offset:3412
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v966*/, s32 offset:3416
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v967*/, s32 offset:3420
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v968*/, s32 offset:3424
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v969*/, s32 offset:3428
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v970*/, s32 offset:3432
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v971*/, s32 offset:3436
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v972*/, s32 offset:3440
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v973*/, s32 offset:3444
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v974*/, s32 offset:3448
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v975*/, s32 offset:3452
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v976*/, s32 offset:3456
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v977*/, s32 offset:3460
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v978*/, s32 offset:3464
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v979*/, s32 offset:3468
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v980*/, s32 offset:3472
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v981*/, s32 offset:3476
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v982*/, s32 offset:3480
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v983*/, s32 offset:3484
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v984*/, s32 offset:3488
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v985*/, s32 offset:3492
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v986*/, s32 offset:3496
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v987*/, s32 offset:3500
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v988*/, s32 offset:3504
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v989*/, s32 offset:3508
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v990*/, s32 offset:3512
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v991*/, s32 offset:3516
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v992*/, s32 offset:3520
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v993*/, s32 offset:3524
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v931*/, s32 offset:3276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v932*/, s32 offset:3280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v933*/, s32 offset:3284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v934*/, s32 offset:3288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v935*/, s32 offset:3292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v936*/, s32 offset:3296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v937*/, s32 offset:3300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v938*/, s32 offset:3304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v939*/, s32 offset:3308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v940*/, s32 offset:3312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v941*/, s32 offset:3316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v942*/, s32 offset:3320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v943*/, s32 offset:3324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v944*/, s32 offset:3328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v945*/, s32 offset:3332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v946*/, s32 offset:3336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v947*/, s32 offset:3340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v948*/, s32 offset:3344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v949*/, s32 offset:3348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v950*/, s32 offset:3352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v951*/, s32 offset:3356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v952*/, s32 offset:3360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v953*/, s32 offset:3364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v954*/, s32 offset:3368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v955*/, s32 offset:3372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v956*/, s32 offset:3376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v957*/, s32 offset:3380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v958*/, s32 offset:3384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v959*/, s32 offset:3388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v960*/, s32 offset:3392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v961*/, s32 offset:3396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v962*/, s32 offset:3400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v963*/, s32 offset:3404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v964*/, s32 offset:3408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v965*/, s32 offset:3412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v966*/, s32 offset:3416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v967*/, s32 offset:3420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v968*/, s32 offset:3424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v969*/, s32 offset:3428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v970*/, s32 offset:3432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v971*/, s32 offset:3436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v972*/, s32 offset:3440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v973*/, s32 offset:3444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v974*/, s32 offset:3448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v975*/, s32 offset:3452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v976*/, s32 offset:3456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v977*/, s32 offset:3460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v978*/, s32 offset:3464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v979*/, s32 offset:3468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v980*/, s32 offset:3472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v981*/, s32 offset:3476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v982*/, s32 offset:3480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v983*/, s32 offset:3484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v984*/, s32 offset:3488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v985*/, s32 offset:3492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v986*/, s32 offset:3496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v987*/, s32 offset:3500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v988*/, s32 offset:3504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v989*/, s32 offset:3508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v990*/, s32 offset:3512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v991*/, s32 offset:3516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v992*/, s32 offset:3520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v993*/, s32 offset:3524 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x1d ; 120-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v994*/, s32 offset:3528
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v995*/, s32 offset:3532
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v996*/, s32 offset:3536
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v997*/, s32 offset:3540
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v998*/, s32 offset:3544
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v999*/, s32 offset:3548
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v1000*/, s32 offset:3552
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v1001*/, s32 offset:3556
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v1002*/, s32 offset:3560
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v1003*/, s32 offset:3564
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v1004*/, s32 offset:3568
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v1005*/, s32 offset:3572
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v1006*/, s32 offset:3576
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v1007*/, s32 offset:3580
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v1008*/, s32 offset:3584
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v1009*/, s32 offset:3588
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v1010*/, s32 offset:3592
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v1011*/, s32 offset:3596
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v1012*/, s32 offset:3600
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v1013*/, s32 offset:3604
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v1014*/, s32 offset:3608
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v1015*/, s32 offset:3612
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v1016*/, s32 offset:3616
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v1017*/, s32 offset:3620
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v1018*/, s32 offset:3624
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v1019*/, s32 offset:3628
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v1020*/, s32 offset:3632
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v1021*/, s32 offset:3636
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v1022*/, s32 offset:3640
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v1023*/, s32 offset:3644
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v994*/, s32 offset:3528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v995*/, s32 offset:3532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v996*/, s32 offset:3536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v997*/, s32 offset:3540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v998*/, s32 offset:3544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v999*/, s32 offset:3548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v1000*/, s32 offset:3552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v1001*/, s32 offset:3556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v1002*/, s32 offset:3560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v1003*/, s32 offset:3564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v1004*/, s32 offset:3568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v1005*/, s32 offset:3572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v1006*/, s32 offset:3576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v1007*/, s32 offset:3580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v1008*/, s32 offset:3584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v1009*/, s32 offset:3588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v1010*/, s32 offset:3592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v1011*/, s32 offset:3596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v1012*/, s32 offset:3600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v1013*/, s32 offset:3604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v1014*/, s32 offset:3608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v1015*/, s32 offset:3612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v1016*/, s32 offset:3616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v1017*/, s32 offset:3620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v1018*/, s32 offset:3624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v1019*/, s32 offset:3628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v1020*/, s32 offset:3632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v1021*/, s32 offset:3636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v1022*/, s32 offset:3640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v1023*/, s32 offset:3644 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
 ; GFX1250-DAGISEL-NEXT:    v_mov_b32_e32 v2, v0
@@ -6876,935 +6876,935 @@ define amdgpu_gfx_whole_wave <2 x half> @tail_call_gfx_from_whole_wave(i1 %activ
 ; GFX1250-DAGISEL-NEXT:    v_swap_b32 v0, v1
 ; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, s0, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2, off, s32 offset:8
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3, off, s32 offset:12
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4, off, s32 offset:16
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5, off, s32 offset:20
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6, off, s32 offset:24
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7, off, s32 offset:28
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8, off, s32 offset:32
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9, off, s32 offset:36
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10, off, s32 offset:40
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11, off, s32 offset:44
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12, off, s32 offset:48
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13, off, s32 offset:52
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14, off, s32 offset:56
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15, off, s32 offset:60
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16, off, s32 offset:64
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17, off, s32 offset:68
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18, off, s32 offset:72
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19, off, s32 offset:76
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20, off, s32 offset:80
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21, off, s32 offset:84
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22, off, s32 offset:88
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23, off, s32 offset:92
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24, off, s32 offset:96
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25, off, s32 offset:100
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26, off, s32 offset:104
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27, off, s32 offset:108
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28, off, s32 offset:112
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29, off, s32 offset:116
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30, off, s32 offset:120
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31, off, s32 offset:124
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32, off, s32 offset:128
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33, off, s32 offset:132
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34, off, s32 offset:136
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35, off, s32 offset:140
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36, off, s32 offset:144
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37, off, s32 offset:148
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38, off, s32 offset:152
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39, off, s32 offset:156
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48, off, s32 offset:160
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49, off, s32 offset:164
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50, off, s32 offset:168
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51, off, s32 offset:172
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52, off, s32 offset:176
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53, off, s32 offset:180
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54, off, s32 offset:184
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55, off, s32 offset:188
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64, off, s32 offset:192
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65, off, s32 offset:196
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66, off, s32 offset:200
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67, off, s32 offset:204
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68, off, s32 offset:208
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69, off, s32 offset:212
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70, off, s32 offset:216
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71, off, s32 offset:220
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80, off, s32 offset:224
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81, off, s32 offset:228
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82, off, s32 offset:232
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83, off, s32 offset:236
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84, off, s32 offset:240
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85, off, s32 offset:244
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86, off, s32 offset:248
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s32 offset:4 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2, off, s32 offset:8 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3, off, s32 offset:12 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4, off, s32 offset:16 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5, off, s32 offset:20 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6, off, s32 offset:24 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7, off, s32 offset:28 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8, off, s32 offset:32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9, off, s32 offset:36 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10, off, s32 offset:40 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11, off, s32 offset:44 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12, off, s32 offset:48 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13, off, s32 offset:52 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14, off, s32 offset:56 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15, off, s32 offset:60 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16, off, s32 offset:64 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17, off, s32 offset:68 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18, off, s32 offset:72 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19, off, s32 offset:76 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20, off, s32 offset:80 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21, off, s32 offset:84 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22, off, s32 offset:88 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23, off, s32 offset:92 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24, off, s32 offset:96 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25, off, s32 offset:100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26, off, s32 offset:104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27, off, s32 offset:108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28, off, s32 offset:112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29, off, s32 offset:116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30, off, s32 offset:120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31, off, s32 offset:124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32, off, s32 offset:128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33, off, s32 offset:132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34, off, s32 offset:136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35, off, s32 offset:140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36, off, s32 offset:144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37, off, s32 offset:148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38, off, s32 offset:152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39, off, s32 offset:156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48, off, s32 offset:160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49, off, s32 offset:164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50, off, s32 offset:168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51, off, s32 offset:172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52, off, s32 offset:176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53, off, s32 offset:180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54, off, s32 offset:184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55, off, s32 offset:188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64, off, s32 offset:192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65, off, s32 offset:196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66, off, s32 offset:200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67, off, s32 offset:204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68, off, s32 offset:208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69, off, s32 offset:212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70, off, s32 offset:216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71, off, s32 offset:220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80, off, s32 offset:224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81, off, s32 offset:228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82, off, s32 offset:232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83, off, s32 offset:236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84, off, s32 offset:240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85, off, s32 offset:244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86, off, s32 offset:248 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87, off, s32 offset:252
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96, off, s32 offset:256
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97, off, s32 offset:260
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98, off, s32 offset:264
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99, off, s32 offset:268
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100, off, s32 offset:272
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101, off, s32 offset:276
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102, off, s32 offset:280
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103, off, s32 offset:284
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112, off, s32 offset:288
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113, off, s32 offset:292
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114, off, s32 offset:296
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115, off, s32 offset:300
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116, off, s32 offset:304
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117, off, s32 offset:308
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118, off, s32 offset:312
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119, off, s32 offset:316
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128, off, s32 offset:320
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129, off, s32 offset:324
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130, off, s32 offset:328
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131, off, s32 offset:332
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132, off, s32 offset:336
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133, off, s32 offset:340
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134, off, s32 offset:344
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135, off, s32 offset:348
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144, off, s32 offset:352
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145, off, s32 offset:356
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146, off, s32 offset:360
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147, off, s32 offset:364
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148, off, s32 offset:368
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149, off, s32 offset:372
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150, off, s32 offset:376
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151, off, s32 offset:380
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160, off, s32 offset:384
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161, off, s32 offset:388
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162, off, s32 offset:392
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163, off, s32 offset:396
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164, off, s32 offset:400
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165, off, s32 offset:404
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166, off, s32 offset:408
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167, off, s32 offset:412
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176, off, s32 offset:416
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177, off, s32 offset:420
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178, off, s32 offset:424
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179, off, s32 offset:428
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180, off, s32 offset:432
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181, off, s32 offset:436
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182, off, s32 offset:440
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183, off, s32 offset:444
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192, off, s32 offset:448
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193, off, s32 offset:452
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194, off, s32 offset:456
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195, off, s32 offset:460
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196, off, s32 offset:464
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197, off, s32 offset:468
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198, off, s32 offset:472
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199, off, s32 offset:476
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208, off, s32 offset:480
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209, off, s32 offset:484
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210, off, s32 offset:488
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211, off, s32 offset:492
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212, off, s32 offset:496
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213, off, s32 offset:500
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87, off, s32 offset:252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96, off, s32 offset:256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97, off, s32 offset:260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98, off, s32 offset:264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99, off, s32 offset:268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100, off, s32 offset:272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101, off, s32 offset:276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102, off, s32 offset:280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103, off, s32 offset:284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112, off, s32 offset:288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113, off, s32 offset:292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114, off, s32 offset:296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115, off, s32 offset:300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116, off, s32 offset:304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117, off, s32 offset:308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118, off, s32 offset:312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119, off, s32 offset:316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128, off, s32 offset:320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129, off, s32 offset:324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130, off, s32 offset:328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131, off, s32 offset:332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132, off, s32 offset:336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133, off, s32 offset:340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134, off, s32 offset:344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135, off, s32 offset:348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144, off, s32 offset:352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145, off, s32 offset:356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146, off, s32 offset:360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147, off, s32 offset:364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148, off, s32 offset:368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149, off, s32 offset:372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150, off, s32 offset:376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151, off, s32 offset:380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160, off, s32 offset:384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161, off, s32 offset:388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162, off, s32 offset:392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163, off, s32 offset:396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164, off, s32 offset:400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165, off, s32 offset:404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166, off, s32 offset:408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167, off, s32 offset:412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176, off, s32 offset:416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177, off, s32 offset:420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178, off, s32 offset:424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179, off, s32 offset:428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180, off, s32 offset:432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181, off, s32 offset:436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182, off, s32 offset:440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183, off, s32 offset:444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192, off, s32 offset:448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193, off, s32 offset:452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194, off, s32 offset:456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195, off, s32 offset:460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196, off, s32 offset:464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197, off, s32 offset:468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198, off, s32 offset:472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199, off, s32 offset:476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208, off, s32 offset:480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209, off, s32 offset:484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210, off, s32 offset:488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211, off, s32 offset:492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212, off, s32 offset:496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213, off, s32 offset:500 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214, off, s32 offset:504
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215, off, s32 offset:508
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224, off, s32 offset:512
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225, off, s32 offset:516
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226, off, s32 offset:520
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227, off, s32 offset:524
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228, off, s32 offset:528
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229, off, s32 offset:532
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230, off, s32 offset:536
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231, off, s32 offset:540
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240, off, s32 offset:544
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241, off, s32 offset:548
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242, off, s32 offset:552
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243, off, s32 offset:556
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244, off, s32 offset:560
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245, off, s32 offset:564
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246, off, s32 offset:568
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247, off, s32 offset:572
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214, off, s32 offset:504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215, off, s32 offset:508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224, off, s32 offset:512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225, off, s32 offset:516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226, off, s32 offset:520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227, off, s32 offset:524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228, off, s32 offset:528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229, off, s32 offset:532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230, off, s32 offset:536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231, off, s32 offset:540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240, off, s32 offset:544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241, off, s32 offset:548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242, off, s32 offset:552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243, off, s32 offset:556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244, off, s32 offset:560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245, off, s32 offset:564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246, off, s32 offset:568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247, off, s32 offset:572 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 64 ; msbs: dst=1 src0=0 src1=0 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v256*/, off, s32 offset:576
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v257*/, off, s32 offset:580
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v258*/, off, s32 offset:584
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v259*/, off, s32 offset:588
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v260*/, off, s32 offset:592
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v261*/, off, s32 offset:596
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v262*/, off, s32 offset:600
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v263*/, off, s32 offset:604
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v264*/, off, s32 offset:608
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v265*/, off, s32 offset:612
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v266*/, off, s32 offset:616
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v267*/, off, s32 offset:620
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v268*/, off, s32 offset:624
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v269*/, off, s32 offset:628
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v270*/, off, s32 offset:632
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v271*/, off, s32 offset:636
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v272*/, off, s32 offset:640
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v273*/, off, s32 offset:644
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v274*/, off, s32 offset:648
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v275*/, off, s32 offset:652
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v276*/, off, s32 offset:656
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v277*/, off, s32 offset:660
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v278*/, off, s32 offset:664
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v279*/, off, s32 offset:668
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v280*/, off, s32 offset:672
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v281*/, off, s32 offset:676
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v282*/, off, s32 offset:680
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v283*/, off, s32 offset:684
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v284*/, off, s32 offset:688
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v285*/, off, s32 offset:692
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v286*/, off, s32 offset:696
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v287*/, off, s32 offset:700
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v288*/, off, s32 offset:704
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v289*/, off, s32 offset:708
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v290*/, off, s32 offset:712
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v291*/, off, s32 offset:716
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v292*/, off, s32 offset:720
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v293*/, off, s32 offset:724
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v294*/, off, s32 offset:728
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v295*/, off, s32 offset:732
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v296*/, off, s32 offset:736
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v297*/, off, s32 offset:740
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v298*/, off, s32 offset:744
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v299*/, off, s32 offset:748
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v300*/, off, s32 offset:752
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v256*/, off, s32 offset:576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v257*/, off, s32 offset:580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v258*/, off, s32 offset:584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v259*/, off, s32 offset:588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v260*/, off, s32 offset:592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v261*/, off, s32 offset:596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v262*/, off, s32 offset:600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v263*/, off, s32 offset:604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v264*/, off, s32 offset:608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v265*/, off, s32 offset:612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v266*/, off, s32 offset:616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v267*/, off, s32 offset:620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v268*/, off, s32 offset:624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v269*/, off, s32 offset:628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v270*/, off, s32 offset:632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v271*/, off, s32 offset:636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v272*/, off, s32 offset:640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v273*/, off, s32 offset:644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v274*/, off, s32 offset:648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v275*/, off, s32 offset:652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v276*/, off, s32 offset:656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v277*/, off, s32 offset:660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v278*/, off, s32 offset:664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v279*/, off, s32 offset:668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v280*/, off, s32 offset:672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v281*/, off, s32 offset:676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v282*/, off, s32 offset:680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v283*/, off, s32 offset:684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v284*/, off, s32 offset:688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v285*/, off, s32 offset:692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v286*/, off, s32 offset:696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v287*/, off, s32 offset:700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v288*/, off, s32 offset:704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v289*/, off, s32 offset:708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v290*/, off, s32 offset:712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v291*/, off, s32 offset:716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v292*/, off, s32 offset:720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v293*/, off, s32 offset:724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v294*/, off, s32 offset:728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v295*/, off, s32 offset:732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v296*/, off, s32 offset:736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v297*/, off, s32 offset:740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v298*/, off, s32 offset:744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v299*/, off, s32 offset:748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v300*/, off, s32 offset:752 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v301*/, off, s32 offset:756
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v302*/, off, s32 offset:760
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v303*/, off, s32 offset:764
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v304*/, off, s32 offset:768
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v305*/, off, s32 offset:772
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v306*/, off, s32 offset:776
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v307*/, off, s32 offset:780
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v308*/, off, s32 offset:784
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v309*/, off, s32 offset:788
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v310*/, off, s32 offset:792
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v311*/, off, s32 offset:796
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v312*/, off, s32 offset:800
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v313*/, off, s32 offset:804
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v314*/, off, s32 offset:808
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v315*/, off, s32 offset:812
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v316*/, off, s32 offset:816
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v317*/, off, s32 offset:820
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v318*/, off, s32 offset:824
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v319*/, off, s32 offset:828
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v320*/, off, s32 offset:832
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v321*/, off, s32 offset:836
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v322*/, off, s32 offset:840
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v323*/, off, s32 offset:844
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v324*/, off, s32 offset:848
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v325*/, off, s32 offset:852
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v326*/, off, s32 offset:856
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v327*/, off, s32 offset:860
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v328*/, off, s32 offset:864
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v329*/, off, s32 offset:868
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v330*/, off, s32 offset:872
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v331*/, off, s32 offset:876
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v332*/, off, s32 offset:880
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v333*/, off, s32 offset:884
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v334*/, off, s32 offset:888
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v335*/, off, s32 offset:892
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v336*/, off, s32 offset:896
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v337*/, off, s32 offset:900
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v338*/, off, s32 offset:904
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v339*/, off, s32 offset:908
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v340*/, off, s32 offset:912
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v341*/, off, s32 offset:916
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v342*/, off, s32 offset:920
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v343*/, off, s32 offset:924
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v344*/, off, s32 offset:928
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v345*/, off, s32 offset:932
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v346*/, off, s32 offset:936
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v347*/, off, s32 offset:940
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v348*/, off, s32 offset:944
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v349*/, off, s32 offset:948
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v350*/, off, s32 offset:952
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v351*/, off, s32 offset:956
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v352*/, off, s32 offset:960
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v353*/, off, s32 offset:964
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v354*/, off, s32 offset:968
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v355*/, off, s32 offset:972
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v356*/, off, s32 offset:976
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v357*/, off, s32 offset:980
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v358*/, off, s32 offset:984
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v359*/, off, s32 offset:988
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v360*/, off, s32 offset:992
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v361*/, off, s32 offset:996
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v362*/, off, s32 offset:1000
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v363*/, off, s32 offset:1004
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v301*/, off, s32 offset:756 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v302*/, off, s32 offset:760 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v303*/, off, s32 offset:764 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v304*/, off, s32 offset:768 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v305*/, off, s32 offset:772 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v306*/, off, s32 offset:776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v307*/, off, s32 offset:780 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v308*/, off, s32 offset:784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v309*/, off, s32 offset:788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v310*/, off, s32 offset:792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v311*/, off, s32 offset:796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v312*/, off, s32 offset:800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v313*/, off, s32 offset:804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v314*/, off, s32 offset:808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v315*/, off, s32 offset:812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v316*/, off, s32 offset:816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v317*/, off, s32 offset:820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v318*/, off, s32 offset:824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v319*/, off, s32 offset:828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v320*/, off, s32 offset:832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v321*/, off, s32 offset:836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v322*/, off, s32 offset:840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v323*/, off, s32 offset:844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v324*/, off, s32 offset:848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v325*/, off, s32 offset:852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v326*/, off, s32 offset:856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v327*/, off, s32 offset:860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v328*/, off, s32 offset:864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v329*/, off, s32 offset:868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v330*/, off, s32 offset:872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v331*/, off, s32 offset:876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v332*/, off, s32 offset:880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v333*/, off, s32 offset:884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v334*/, off, s32 offset:888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v335*/, off, s32 offset:892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v336*/, off, s32 offset:896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v337*/, off, s32 offset:900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v338*/, off, s32 offset:904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v339*/, off, s32 offset:908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v340*/, off, s32 offset:912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v341*/, off, s32 offset:916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v342*/, off, s32 offset:920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v343*/, off, s32 offset:924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v344*/, off, s32 offset:928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v345*/, off, s32 offset:932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v346*/, off, s32 offset:936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v347*/, off, s32 offset:940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v348*/, off, s32 offset:944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v349*/, off, s32 offset:948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v350*/, off, s32 offset:952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v351*/, off, s32 offset:956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v352*/, off, s32 offset:960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v353*/, off, s32 offset:964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v354*/, off, s32 offset:968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v355*/, off, s32 offset:972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v356*/, off, s32 offset:976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v357*/, off, s32 offset:980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v358*/, off, s32 offset:984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v359*/, off, s32 offset:988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v360*/, off, s32 offset:992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v361*/, off, s32 offset:996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v362*/, off, s32 offset:1000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v363*/, off, s32 offset:1004 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v364*/, off, s32 offset:1008
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v365*/, off, s32 offset:1012
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v366*/, off, s32 offset:1016
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v367*/, off, s32 offset:1020
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v368*/, off, s32 offset:1024
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v369*/, off, s32 offset:1028
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v370*/, off, s32 offset:1032
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v371*/, off, s32 offset:1036
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v372*/, off, s32 offset:1040
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v373*/, off, s32 offset:1044
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v374*/, off, s32 offset:1048
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v375*/, off, s32 offset:1052
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v376*/, off, s32 offset:1056
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v377*/, off, s32 offset:1060
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v378*/, off, s32 offset:1064
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v379*/, off, s32 offset:1068
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v380*/, off, s32 offset:1072
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v381*/, off, s32 offset:1076
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v382*/, off, s32 offset:1080
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v383*/, off, s32 offset:1084
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v384*/, off, s32 offset:1088
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v385*/, off, s32 offset:1092
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v386*/, off, s32 offset:1096
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v387*/, off, s32 offset:1100
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v388*/, off, s32 offset:1104
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v389*/, off, s32 offset:1108
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v390*/, off, s32 offset:1112
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v391*/, off, s32 offset:1116
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v392*/, off, s32 offset:1120
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v393*/, off, s32 offset:1124
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v394*/, off, s32 offset:1128
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v395*/, off, s32 offset:1132
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v396*/, off, s32 offset:1136
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v397*/, off, s32 offset:1140
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v398*/, off, s32 offset:1144
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v399*/, off, s32 offset:1148
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v400*/, off, s32 offset:1152
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v401*/, off, s32 offset:1156
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v402*/, off, s32 offset:1160
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v403*/, off, s32 offset:1164
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v404*/, off, s32 offset:1168
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v405*/, off, s32 offset:1172
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v406*/, off, s32 offset:1176
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v407*/, off, s32 offset:1180
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v408*/, off, s32 offset:1184
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v409*/, off, s32 offset:1188
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v410*/, off, s32 offset:1192
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v411*/, off, s32 offset:1196
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v412*/, off, s32 offset:1200
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v413*/, off, s32 offset:1204
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v414*/, off, s32 offset:1208
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v415*/, off, s32 offset:1212
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v416*/, off, s32 offset:1216
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v417*/, off, s32 offset:1220
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v418*/, off, s32 offset:1224
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v419*/, off, s32 offset:1228
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v420*/, off, s32 offset:1232
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v421*/, off, s32 offset:1236
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v422*/, off, s32 offset:1240
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v423*/, off, s32 offset:1244
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v424*/, off, s32 offset:1248
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v425*/, off, s32 offset:1252
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v426*/, off, s32 offset:1256
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v364*/, off, s32 offset:1008 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v365*/, off, s32 offset:1012 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v366*/, off, s32 offset:1016 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v367*/, off, s32 offset:1020 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v368*/, off, s32 offset:1024 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v369*/, off, s32 offset:1028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v370*/, off, s32 offset:1032 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v371*/, off, s32 offset:1036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v372*/, off, s32 offset:1040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v373*/, off, s32 offset:1044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v374*/, off, s32 offset:1048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v375*/, off, s32 offset:1052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v376*/, off, s32 offset:1056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v377*/, off, s32 offset:1060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v378*/, off, s32 offset:1064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v379*/, off, s32 offset:1068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v380*/, off, s32 offset:1072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v381*/, off, s32 offset:1076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v382*/, off, s32 offset:1080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v383*/, off, s32 offset:1084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v384*/, off, s32 offset:1088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v385*/, off, s32 offset:1092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v386*/, off, s32 offset:1096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v387*/, off, s32 offset:1100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v388*/, off, s32 offset:1104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v389*/, off, s32 offset:1108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v390*/, off, s32 offset:1112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v391*/, off, s32 offset:1116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v392*/, off, s32 offset:1120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v393*/, off, s32 offset:1124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v394*/, off, s32 offset:1128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v395*/, off, s32 offset:1132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v396*/, off, s32 offset:1136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v397*/, off, s32 offset:1140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v398*/, off, s32 offset:1144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v399*/, off, s32 offset:1148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v400*/, off, s32 offset:1152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v401*/, off, s32 offset:1156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v402*/, off, s32 offset:1160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v403*/, off, s32 offset:1164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v404*/, off, s32 offset:1168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v405*/, off, s32 offset:1172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v406*/, off, s32 offset:1176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v407*/, off, s32 offset:1180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v408*/, off, s32 offset:1184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v409*/, off, s32 offset:1188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v410*/, off, s32 offset:1192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v411*/, off, s32 offset:1196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v412*/, off, s32 offset:1200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v413*/, off, s32 offset:1204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v414*/, off, s32 offset:1208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v415*/, off, s32 offset:1212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v416*/, off, s32 offset:1216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v417*/, off, s32 offset:1220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v418*/, off, s32 offset:1224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v419*/, off, s32 offset:1228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v420*/, off, s32 offset:1232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v421*/, off, s32 offset:1236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v422*/, off, s32 offset:1240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v423*/, off, s32 offset:1244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v424*/, off, s32 offset:1248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v425*/, off, s32 offset:1252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v426*/, off, s32 offset:1256 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v427*/, off, s32 offset:1260
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v428*/, off, s32 offset:1264
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v429*/, off, s32 offset:1268
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v430*/, off, s32 offset:1272
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v431*/, off, s32 offset:1276
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v432*/, off, s32 offset:1280
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v433*/, off, s32 offset:1284
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v434*/, off, s32 offset:1288
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v435*/, off, s32 offset:1292
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v436*/, off, s32 offset:1296
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v437*/, off, s32 offset:1300
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v438*/, off, s32 offset:1304
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v439*/, off, s32 offset:1308
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v440*/, off, s32 offset:1312
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v441*/, off, s32 offset:1316
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v442*/, off, s32 offset:1320
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v443*/, off, s32 offset:1324
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v444*/, off, s32 offset:1328
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v445*/, off, s32 offset:1332
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v446*/, off, s32 offset:1336
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v447*/, off, s32 offset:1340
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v448*/, off, s32 offset:1344
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v449*/, off, s32 offset:1348
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v450*/, off, s32 offset:1352
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v451*/, off, s32 offset:1356
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v452*/, off, s32 offset:1360
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v453*/, off, s32 offset:1364
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v454*/, off, s32 offset:1368
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v455*/, off, s32 offset:1372
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v456*/, off, s32 offset:1376
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v457*/, off, s32 offset:1380
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v458*/, off, s32 offset:1384
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v459*/, off, s32 offset:1388
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v460*/, off, s32 offset:1392
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v461*/, off, s32 offset:1396
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v462*/, off, s32 offset:1400
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v463*/, off, s32 offset:1404
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v464*/, off, s32 offset:1408
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v465*/, off, s32 offset:1412
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v466*/, off, s32 offset:1416
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v467*/, off, s32 offset:1420
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v468*/, off, s32 offset:1424
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v469*/, off, s32 offset:1428
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v470*/, off, s32 offset:1432
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v471*/, off, s32 offset:1436
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v472*/, off, s32 offset:1440
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v473*/, off, s32 offset:1444
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v474*/, off, s32 offset:1448
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v475*/, off, s32 offset:1452
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v476*/, off, s32 offset:1456
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v477*/, off, s32 offset:1460
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v478*/, off, s32 offset:1464
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v479*/, off, s32 offset:1468
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v480*/, off, s32 offset:1472
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v481*/, off, s32 offset:1476
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v482*/, off, s32 offset:1480
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v483*/, off, s32 offset:1484
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v484*/, off, s32 offset:1488
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v485*/, off, s32 offset:1492
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v486*/, off, s32 offset:1496
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v487*/, off, s32 offset:1500
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v488*/, off, s32 offset:1504
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v489*/, off, s32 offset:1508
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v427*/, off, s32 offset:1260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v428*/, off, s32 offset:1264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v429*/, off, s32 offset:1268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v430*/, off, s32 offset:1272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v431*/, off, s32 offset:1276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v432*/, off, s32 offset:1280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v433*/, off, s32 offset:1284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v434*/, off, s32 offset:1288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v435*/, off, s32 offset:1292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v436*/, off, s32 offset:1296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v437*/, off, s32 offset:1300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v438*/, off, s32 offset:1304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v439*/, off, s32 offset:1308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v440*/, off, s32 offset:1312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v441*/, off, s32 offset:1316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v442*/, off, s32 offset:1320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v443*/, off, s32 offset:1324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v444*/, off, s32 offset:1328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v445*/, off, s32 offset:1332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v446*/, off, s32 offset:1336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v447*/, off, s32 offset:1340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v448*/, off, s32 offset:1344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v449*/, off, s32 offset:1348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v450*/, off, s32 offset:1352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v451*/, off, s32 offset:1356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v452*/, off, s32 offset:1360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v453*/, off, s32 offset:1364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v454*/, off, s32 offset:1368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v455*/, off, s32 offset:1372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v456*/, off, s32 offset:1376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v457*/, off, s32 offset:1380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v458*/, off, s32 offset:1384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v459*/, off, s32 offset:1388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v460*/, off, s32 offset:1392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v461*/, off, s32 offset:1396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v462*/, off, s32 offset:1400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v463*/, off, s32 offset:1404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v464*/, off, s32 offset:1408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v465*/, off, s32 offset:1412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v466*/, off, s32 offset:1416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v467*/, off, s32 offset:1420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v468*/, off, s32 offset:1424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v469*/, off, s32 offset:1428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v470*/, off, s32 offset:1432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v471*/, off, s32 offset:1436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v472*/, off, s32 offset:1440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v473*/, off, s32 offset:1444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v474*/, off, s32 offset:1448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v475*/, off, s32 offset:1452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v476*/, off, s32 offset:1456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v477*/, off, s32 offset:1460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v478*/, off, s32 offset:1464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v479*/, off, s32 offset:1468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v480*/, off, s32 offset:1472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v481*/, off, s32 offset:1476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v482*/, off, s32 offset:1480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v483*/, off, s32 offset:1484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v484*/, off, s32 offset:1488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v485*/, off, s32 offset:1492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v486*/, off, s32 offset:1496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v487*/, off, s32 offset:1500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v488*/, off, s32 offset:1504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v489*/, off, s32 offset:1508 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v490*/, off, s32 offset:1512
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v491*/, off, s32 offset:1516
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v492*/, off, s32 offset:1520
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v493*/, off, s32 offset:1524
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v494*/, off, s32 offset:1528
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v495*/, off, s32 offset:1532
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v496*/, off, s32 offset:1536
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v497*/, off, s32 offset:1540
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v498*/, off, s32 offset:1544
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v499*/, off, s32 offset:1548
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v500*/, off, s32 offset:1552
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v501*/, off, s32 offset:1556
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v502*/, off, s32 offset:1560
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v503*/, off, s32 offset:1564
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v504*/, off, s32 offset:1568
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v505*/, off, s32 offset:1572
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v506*/, off, s32 offset:1576
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v507*/, off, s32 offset:1580
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v508*/, off, s32 offset:1584
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v509*/, off, s32 offset:1588
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v510*/, off, s32 offset:1592
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v511*/, off, s32 offset:1596
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v490*/, off, s32 offset:1512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v491*/, off, s32 offset:1516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v492*/, off, s32 offset:1520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v493*/, off, s32 offset:1524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v494*/, off, s32 offset:1528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v495*/, off, s32 offset:1532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v496*/, off, s32 offset:1536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v497*/, off, s32 offset:1540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v498*/, off, s32 offset:1544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v499*/, off, s32 offset:1548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v500*/, off, s32 offset:1552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v501*/, off, s32 offset:1556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v502*/, off, s32 offset:1560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v503*/, off, s32 offset:1564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v504*/, off, s32 offset:1568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v505*/, off, s32 offset:1572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v506*/, off, s32 offset:1576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v507*/, off, s32 offset:1580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v508*/, off, s32 offset:1584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v509*/, off, s32 offset:1588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v510*/, off, s32 offset:1592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v511*/, off, s32 offset:1596 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x4080 ; msbs: dst=2 src0=0 src1=0 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v512*/, off, s32 offset:1600
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v513*/, off, s32 offset:1604
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v514*/, off, s32 offset:1608
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v515*/, off, s32 offset:1612
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v516*/, off, s32 offset:1616
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v517*/, off, s32 offset:1620
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v518*/, off, s32 offset:1624
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v519*/, off, s32 offset:1628
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v520*/, off, s32 offset:1632
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v521*/, off, s32 offset:1636
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v522*/, off, s32 offset:1640
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v523*/, off, s32 offset:1644
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v524*/, off, s32 offset:1648
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v525*/, off, s32 offset:1652
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v526*/, off, s32 offset:1656
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v527*/, off, s32 offset:1660
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v528*/, off, s32 offset:1664
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v529*/, off, s32 offset:1668
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v530*/, off, s32 offset:1672
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v531*/, off, s32 offset:1676
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v532*/, off, s32 offset:1680
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v533*/, off, s32 offset:1684
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v534*/, off, s32 offset:1688
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v535*/, off, s32 offset:1692
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v536*/, off, s32 offset:1696
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v537*/, off, s32 offset:1700
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v538*/, off, s32 offset:1704
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v539*/, off, s32 offset:1708
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v540*/, off, s32 offset:1712
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v541*/, off, s32 offset:1716
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v542*/, off, s32 offset:1720
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v543*/, off, s32 offset:1724
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v544*/, off, s32 offset:1728
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v545*/, off, s32 offset:1732
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v546*/, off, s32 offset:1736
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v547*/, off, s32 offset:1740
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v548*/, off, s32 offset:1744
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v549*/, off, s32 offset:1748
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v550*/, off, s32 offset:1752
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v551*/, off, s32 offset:1756
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v552*/, off, s32 offset:1760
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v512*/, off, s32 offset:1600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v513*/, off, s32 offset:1604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v514*/, off, s32 offset:1608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v515*/, off, s32 offset:1612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v516*/, off, s32 offset:1616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v517*/, off, s32 offset:1620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v518*/, off, s32 offset:1624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v519*/, off, s32 offset:1628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v520*/, off, s32 offset:1632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v521*/, off, s32 offset:1636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v522*/, off, s32 offset:1640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v523*/, off, s32 offset:1644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v524*/, off, s32 offset:1648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v525*/, off, s32 offset:1652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v526*/, off, s32 offset:1656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v527*/, off, s32 offset:1660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v528*/, off, s32 offset:1664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v529*/, off, s32 offset:1668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v530*/, off, s32 offset:1672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v531*/, off, s32 offset:1676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v532*/, off, s32 offset:1680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v533*/, off, s32 offset:1684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v534*/, off, s32 offset:1688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v535*/, off, s32 offset:1692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v536*/, off, s32 offset:1696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v537*/, off, s32 offset:1700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v538*/, off, s32 offset:1704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v539*/, off, s32 offset:1708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v540*/, off, s32 offset:1712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v541*/, off, s32 offset:1716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v542*/, off, s32 offset:1720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v543*/, off, s32 offset:1724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v544*/, off, s32 offset:1728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v545*/, off, s32 offset:1732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v546*/, off, s32 offset:1736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v547*/, off, s32 offset:1740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v548*/, off, s32 offset:1744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v549*/, off, s32 offset:1748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v550*/, off, s32 offset:1752 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v551*/, off, s32 offset:1756 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v552*/, off, s32 offset:1760 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v553*/, off, s32 offset:1764
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v554*/, off, s32 offset:1768
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v555*/, off, s32 offset:1772
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v556*/, off, s32 offset:1776
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v557*/, off, s32 offset:1780
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v558*/, off, s32 offset:1784
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v559*/, off, s32 offset:1788
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v560*/, off, s32 offset:1792
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v561*/, off, s32 offset:1796
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v562*/, off, s32 offset:1800
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v563*/, off, s32 offset:1804
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v564*/, off, s32 offset:1808
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v565*/, off, s32 offset:1812
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v566*/, off, s32 offset:1816
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v567*/, off, s32 offset:1820
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v568*/, off, s32 offset:1824
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v569*/, off, s32 offset:1828
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v570*/, off, s32 offset:1832
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v571*/, off, s32 offset:1836
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v572*/, off, s32 offset:1840
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v573*/, off, s32 offset:1844
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v574*/, off, s32 offset:1848
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v575*/, off, s32 offset:1852
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v576*/, off, s32 offset:1856
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v577*/, off, s32 offset:1860
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v578*/, off, s32 offset:1864
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v579*/, off, s32 offset:1868
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v580*/, off, s32 offset:1872
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v581*/, off, s32 offset:1876
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v582*/, off, s32 offset:1880
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v583*/, off, s32 offset:1884
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v584*/, off, s32 offset:1888
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v585*/, off, s32 offset:1892
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v586*/, off, s32 offset:1896
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v587*/, off, s32 offset:1900
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v588*/, off, s32 offset:1904
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v589*/, off, s32 offset:1908
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v590*/, off, s32 offset:1912
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v591*/, off, s32 offset:1916
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v592*/, off, s32 offset:1920
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v593*/, off, s32 offset:1924
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v594*/, off, s32 offset:1928
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v595*/, off, s32 offset:1932
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v596*/, off, s32 offset:1936
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v597*/, off, s32 offset:1940
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v598*/, off, s32 offset:1944
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v599*/, off, s32 offset:1948
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v600*/, off, s32 offset:1952
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v601*/, off, s32 offset:1956
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v602*/, off, s32 offset:1960
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v603*/, off, s32 offset:1964
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v604*/, off, s32 offset:1968
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v605*/, off, s32 offset:1972
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v606*/, off, s32 offset:1976
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v607*/, off, s32 offset:1980
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v608*/, off, s32 offset:1984
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v609*/, off, s32 offset:1988
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v610*/, off, s32 offset:1992
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v611*/, off, s32 offset:1996
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v612*/, off, s32 offset:2000
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v613*/, off, s32 offset:2004
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v614*/, off, s32 offset:2008
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v615*/, off, s32 offset:2012
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v553*/, off, s32 offset:1764 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v554*/, off, s32 offset:1768 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v555*/, off, s32 offset:1772 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v556*/, off, s32 offset:1776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v557*/, off, s32 offset:1780 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v558*/, off, s32 offset:1784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v559*/, off, s32 offset:1788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v560*/, off, s32 offset:1792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v561*/, off, s32 offset:1796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v562*/, off, s32 offset:1800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v563*/, off, s32 offset:1804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v564*/, off, s32 offset:1808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v565*/, off, s32 offset:1812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v566*/, off, s32 offset:1816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v567*/, off, s32 offset:1820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v568*/, off, s32 offset:1824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v569*/, off, s32 offset:1828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v570*/, off, s32 offset:1832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v571*/, off, s32 offset:1836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v572*/, off, s32 offset:1840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v573*/, off, s32 offset:1844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v574*/, off, s32 offset:1848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v575*/, off, s32 offset:1852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v576*/, off, s32 offset:1856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v577*/, off, s32 offset:1860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v578*/, off, s32 offset:1864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v579*/, off, s32 offset:1868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v580*/, off, s32 offset:1872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v581*/, off, s32 offset:1876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v582*/, off, s32 offset:1880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v583*/, off, s32 offset:1884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v584*/, off, s32 offset:1888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v585*/, off, s32 offset:1892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v586*/, off, s32 offset:1896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v587*/, off, s32 offset:1900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v588*/, off, s32 offset:1904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v589*/, off, s32 offset:1908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v590*/, off, s32 offset:1912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v591*/, off, s32 offset:1916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v592*/, off, s32 offset:1920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v593*/, off, s32 offset:1924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v594*/, off, s32 offset:1928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v595*/, off, s32 offset:1932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v596*/, off, s32 offset:1936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v597*/, off, s32 offset:1940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v598*/, off, s32 offset:1944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v599*/, off, s32 offset:1948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v600*/, off, s32 offset:1952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v601*/, off, s32 offset:1956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v602*/, off, s32 offset:1960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v603*/, off, s32 offset:1964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v604*/, off, s32 offset:1968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v605*/, off, s32 offset:1972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v606*/, off, s32 offset:1976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v607*/, off, s32 offset:1980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v608*/, off, s32 offset:1984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v609*/, off, s32 offset:1988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v610*/, off, s32 offset:1992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v611*/, off, s32 offset:1996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v612*/, off, s32 offset:2000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v613*/, off, s32 offset:2004 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v614*/, off, s32 offset:2008 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v615*/, off, s32 offset:2012 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v616*/, off, s32 offset:2016
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v617*/, off, s32 offset:2020
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v618*/, off, s32 offset:2024
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v619*/, off, s32 offset:2028
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v620*/, off, s32 offset:2032
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v621*/, off, s32 offset:2036
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v622*/, off, s32 offset:2040
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v623*/, off, s32 offset:2044
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v624*/, off, s32 offset:2048
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v625*/, off, s32 offset:2052
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v626*/, off, s32 offset:2056
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v627*/, off, s32 offset:2060
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v628*/, off, s32 offset:2064
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v629*/, off, s32 offset:2068
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v630*/, off, s32 offset:2072
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v631*/, off, s32 offset:2076
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v632*/, off, s32 offset:2080
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v633*/, off, s32 offset:2084
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v634*/, off, s32 offset:2088
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v635*/, off, s32 offset:2092
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v636*/, off, s32 offset:2096
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v637*/, off, s32 offset:2100
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v638*/, off, s32 offset:2104
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v639*/, off, s32 offset:2108
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v640*/, off, s32 offset:2112
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v641*/, off, s32 offset:2116
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v642*/, off, s32 offset:2120
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v643*/, off, s32 offset:2124
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v644*/, off, s32 offset:2128
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v645*/, off, s32 offset:2132
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v646*/, off, s32 offset:2136
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v647*/, off, s32 offset:2140
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v648*/, off, s32 offset:2144
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v649*/, off, s32 offset:2148
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v650*/, off, s32 offset:2152
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v651*/, off, s32 offset:2156
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v652*/, off, s32 offset:2160
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v653*/, off, s32 offset:2164
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v654*/, off, s32 offset:2168
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v655*/, off, s32 offset:2172
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v656*/, off, s32 offset:2176
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v657*/, off, s32 offset:2180
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v658*/, off, s32 offset:2184
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v659*/, off, s32 offset:2188
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v660*/, off, s32 offset:2192
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v661*/, off, s32 offset:2196
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v662*/, off, s32 offset:2200
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v663*/, off, s32 offset:2204
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v664*/, off, s32 offset:2208
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v665*/, off, s32 offset:2212
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v666*/, off, s32 offset:2216
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v667*/, off, s32 offset:2220
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v668*/, off, s32 offset:2224
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v669*/, off, s32 offset:2228
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v670*/, off, s32 offset:2232
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v671*/, off, s32 offset:2236
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v672*/, off, s32 offset:2240
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v673*/, off, s32 offset:2244
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v674*/, off, s32 offset:2248
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v675*/, off, s32 offset:2252
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v676*/, off, s32 offset:2256
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v677*/, off, s32 offset:2260
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v678*/, off, s32 offset:2264
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v616*/, off, s32 offset:2016 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v617*/, off, s32 offset:2020 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v618*/, off, s32 offset:2024 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v619*/, off, s32 offset:2028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v620*/, off, s32 offset:2032 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v621*/, off, s32 offset:2036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v622*/, off, s32 offset:2040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v623*/, off, s32 offset:2044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v624*/, off, s32 offset:2048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v625*/, off, s32 offset:2052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v626*/, off, s32 offset:2056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v627*/, off, s32 offset:2060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v628*/, off, s32 offset:2064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v629*/, off, s32 offset:2068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v630*/, off, s32 offset:2072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v631*/, off, s32 offset:2076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v632*/, off, s32 offset:2080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v633*/, off, s32 offset:2084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v634*/, off, s32 offset:2088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v635*/, off, s32 offset:2092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v636*/, off, s32 offset:2096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v637*/, off, s32 offset:2100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v638*/, off, s32 offset:2104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v639*/, off, s32 offset:2108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v640*/, off, s32 offset:2112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v641*/, off, s32 offset:2116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v642*/, off, s32 offset:2120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v643*/, off, s32 offset:2124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v644*/, off, s32 offset:2128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v645*/, off, s32 offset:2132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v646*/, off, s32 offset:2136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v647*/, off, s32 offset:2140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v648*/, off, s32 offset:2144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v649*/, off, s32 offset:2148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v650*/, off, s32 offset:2152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v651*/, off, s32 offset:2156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v652*/, off, s32 offset:2160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v653*/, off, s32 offset:2164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v654*/, off, s32 offset:2168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v655*/, off, s32 offset:2172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v656*/, off, s32 offset:2176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v657*/, off, s32 offset:2180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v658*/, off, s32 offset:2184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v659*/, off, s32 offset:2188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v660*/, off, s32 offset:2192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v661*/, off, s32 offset:2196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v662*/, off, s32 offset:2200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v663*/, off, s32 offset:2204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v664*/, off, s32 offset:2208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v665*/, off, s32 offset:2212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v666*/, off, s32 offset:2216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v667*/, off, s32 offset:2220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v668*/, off, s32 offset:2224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v669*/, off, s32 offset:2228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v670*/, off, s32 offset:2232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v671*/, off, s32 offset:2236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v672*/, off, s32 offset:2240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v673*/, off, s32 offset:2244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v674*/, off, s32 offset:2248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v675*/, off, s32 offset:2252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v676*/, off, s32 offset:2256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v677*/, off, s32 offset:2260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v678*/, off, s32 offset:2264 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v679*/, off, s32 offset:2268
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v680*/, off, s32 offset:2272
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v681*/, off, s32 offset:2276
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v682*/, off, s32 offset:2280
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v683*/, off, s32 offset:2284
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v684*/, off, s32 offset:2288
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v685*/, off, s32 offset:2292
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v686*/, off, s32 offset:2296
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v687*/, off, s32 offset:2300
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v688*/, off, s32 offset:2304
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v689*/, off, s32 offset:2308
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v690*/, off, s32 offset:2312
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v691*/, off, s32 offset:2316
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v692*/, off, s32 offset:2320
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v693*/, off, s32 offset:2324
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v694*/, off, s32 offset:2328
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v695*/, off, s32 offset:2332
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v696*/, off, s32 offset:2336
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v697*/, off, s32 offset:2340
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v698*/, off, s32 offset:2344
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v699*/, off, s32 offset:2348
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v700*/, off, s32 offset:2352
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v701*/, off, s32 offset:2356
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v702*/, off, s32 offset:2360
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v703*/, off, s32 offset:2364
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v704*/, off, s32 offset:2368
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v705*/, off, s32 offset:2372
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v706*/, off, s32 offset:2376
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v707*/, off, s32 offset:2380
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v708*/, off, s32 offset:2384
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v709*/, off, s32 offset:2388
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v710*/, off, s32 offset:2392
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v711*/, off, s32 offset:2396
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v712*/, off, s32 offset:2400
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v713*/, off, s32 offset:2404
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v714*/, off, s32 offset:2408
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v715*/, off, s32 offset:2412
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v716*/, off, s32 offset:2416
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v717*/, off, s32 offset:2420
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v718*/, off, s32 offset:2424
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v719*/, off, s32 offset:2428
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v720*/, off, s32 offset:2432
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v721*/, off, s32 offset:2436
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v722*/, off, s32 offset:2440
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v723*/, off, s32 offset:2444
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v724*/, off, s32 offset:2448
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v725*/, off, s32 offset:2452
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v726*/, off, s32 offset:2456
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v727*/, off, s32 offset:2460
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v728*/, off, s32 offset:2464
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v729*/, off, s32 offset:2468
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v730*/, off, s32 offset:2472
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v731*/, off, s32 offset:2476
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v732*/, off, s32 offset:2480
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v733*/, off, s32 offset:2484
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v734*/, off, s32 offset:2488
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v735*/, off, s32 offset:2492
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v736*/, off, s32 offset:2496
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v737*/, off, s32 offset:2500
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v738*/, off, s32 offset:2504
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v739*/, off, s32 offset:2508
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v740*/, off, s32 offset:2512
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v741*/, off, s32 offset:2516
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v679*/, off, s32 offset:2268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v680*/, off, s32 offset:2272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v681*/, off, s32 offset:2276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v682*/, off, s32 offset:2280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v683*/, off, s32 offset:2284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v684*/, off, s32 offset:2288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v685*/, off, s32 offset:2292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v686*/, off, s32 offset:2296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v687*/, off, s32 offset:2300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v688*/, off, s32 offset:2304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v689*/, off, s32 offset:2308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v690*/, off, s32 offset:2312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v691*/, off, s32 offset:2316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v692*/, off, s32 offset:2320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v693*/, off, s32 offset:2324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v694*/, off, s32 offset:2328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v695*/, off, s32 offset:2332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v696*/, off, s32 offset:2336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v697*/, off, s32 offset:2340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v698*/, off, s32 offset:2344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v699*/, off, s32 offset:2348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v700*/, off, s32 offset:2352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v701*/, off, s32 offset:2356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v702*/, off, s32 offset:2360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v703*/, off, s32 offset:2364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v704*/, off, s32 offset:2368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v705*/, off, s32 offset:2372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v706*/, off, s32 offset:2376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v707*/, off, s32 offset:2380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v708*/, off, s32 offset:2384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v709*/, off, s32 offset:2388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v710*/, off, s32 offset:2392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v711*/, off, s32 offset:2396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v712*/, off, s32 offset:2400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v713*/, off, s32 offset:2404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v714*/, off, s32 offset:2408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v715*/, off, s32 offset:2412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v716*/, off, s32 offset:2416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v717*/, off, s32 offset:2420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v718*/, off, s32 offset:2424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v719*/, off, s32 offset:2428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v720*/, off, s32 offset:2432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v721*/, off, s32 offset:2436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v722*/, off, s32 offset:2440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v723*/, off, s32 offset:2444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v724*/, off, s32 offset:2448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v725*/, off, s32 offset:2452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v726*/, off, s32 offset:2456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v727*/, off, s32 offset:2460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v728*/, off, s32 offset:2464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v729*/, off, s32 offset:2468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v730*/, off, s32 offset:2472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v731*/, off, s32 offset:2476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v732*/, off, s32 offset:2480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v733*/, off, s32 offset:2484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v734*/, off, s32 offset:2488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v735*/, off, s32 offset:2492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v736*/, off, s32 offset:2496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v737*/, off, s32 offset:2500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v738*/, off, s32 offset:2504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v739*/, off, s32 offset:2508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v740*/, off, s32 offset:2512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v741*/, off, s32 offset:2516 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v742*/, off, s32 offset:2520
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v743*/, off, s32 offset:2524
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v744*/, off, s32 offset:2528
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v745*/, off, s32 offset:2532
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v746*/, off, s32 offset:2536
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v747*/, off, s32 offset:2540
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v748*/, off, s32 offset:2544
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v749*/, off, s32 offset:2548
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v750*/, off, s32 offset:2552
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v751*/, off, s32 offset:2556
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v752*/, off, s32 offset:2560
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v753*/, off, s32 offset:2564
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v754*/, off, s32 offset:2568
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v755*/, off, s32 offset:2572
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v756*/, off, s32 offset:2576
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v757*/, off, s32 offset:2580
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v758*/, off, s32 offset:2584
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v759*/, off, s32 offset:2588
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v760*/, off, s32 offset:2592
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v761*/, off, s32 offset:2596
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v762*/, off, s32 offset:2600
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v763*/, off, s32 offset:2604
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v764*/, off, s32 offset:2608
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v765*/, off, s32 offset:2612
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v766*/, off, s32 offset:2616
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v767*/, off, s32 offset:2620
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v742*/, off, s32 offset:2520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v743*/, off, s32 offset:2524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v744*/, off, s32 offset:2528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v745*/, off, s32 offset:2532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v746*/, off, s32 offset:2536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v747*/, off, s32 offset:2540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v748*/, off, s32 offset:2544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v749*/, off, s32 offset:2548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v750*/, off, s32 offset:2552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v751*/, off, s32 offset:2556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v752*/, off, s32 offset:2560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v753*/, off, s32 offset:2564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v754*/, off, s32 offset:2568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v755*/, off, s32 offset:2572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v756*/, off, s32 offset:2576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v757*/, off, s32 offset:2580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v758*/, off, s32 offset:2584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v759*/, off, s32 offset:2588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v760*/, off, s32 offset:2592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v761*/, off, s32 offset:2596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v762*/, off, s32 offset:2600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v763*/, off, s32 offset:2604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v764*/, off, s32 offset:2608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v765*/, off, s32 offset:2612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v766*/, off, s32 offset:2616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v767*/, off, s32 offset:2620 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x80c0 ; msbs: dst=3 src0=0 src1=0 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v768*/, off, s32 offset:2624
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v769*/, off, s32 offset:2628
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v770*/, off, s32 offset:2632
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v771*/, off, s32 offset:2636
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v772*/, off, s32 offset:2640
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v773*/, off, s32 offset:2644
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v774*/, off, s32 offset:2648
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v775*/, off, s32 offset:2652
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v776*/, off, s32 offset:2656
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v777*/, off, s32 offset:2660
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v778*/, off, s32 offset:2664
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v779*/, off, s32 offset:2668
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v780*/, off, s32 offset:2672
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v781*/, off, s32 offset:2676
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v782*/, off, s32 offset:2680
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v783*/, off, s32 offset:2684
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v784*/, off, s32 offset:2688
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v785*/, off, s32 offset:2692
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v786*/, off, s32 offset:2696
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v787*/, off, s32 offset:2700
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v788*/, off, s32 offset:2704
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v789*/, off, s32 offset:2708
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v790*/, off, s32 offset:2712
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v791*/, off, s32 offset:2716
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v792*/, off, s32 offset:2720
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v793*/, off, s32 offset:2724
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v794*/, off, s32 offset:2728
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v795*/, off, s32 offset:2732
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v796*/, off, s32 offset:2736
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v797*/, off, s32 offset:2740
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v798*/, off, s32 offset:2744
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v799*/, off, s32 offset:2748
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v800*/, off, s32 offset:2752
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v801*/, off, s32 offset:2756
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v802*/, off, s32 offset:2760
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v803*/, off, s32 offset:2764
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v804*/, off, s32 offset:2768
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v768*/, off, s32 offset:2624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v769*/, off, s32 offset:2628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v770*/, off, s32 offset:2632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v771*/, off, s32 offset:2636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v772*/, off, s32 offset:2640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v773*/, off, s32 offset:2644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v774*/, off, s32 offset:2648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v775*/, off, s32 offset:2652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v776*/, off, s32 offset:2656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v777*/, off, s32 offset:2660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v778*/, off, s32 offset:2664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v779*/, off, s32 offset:2668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v780*/, off, s32 offset:2672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v781*/, off, s32 offset:2676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v782*/, off, s32 offset:2680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v783*/, off, s32 offset:2684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v784*/, off, s32 offset:2688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v785*/, off, s32 offset:2692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v786*/, off, s32 offset:2696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v787*/, off, s32 offset:2700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v788*/, off, s32 offset:2704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v789*/, off, s32 offset:2708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v790*/, off, s32 offset:2712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v791*/, off, s32 offset:2716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v792*/, off, s32 offset:2720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v793*/, off, s32 offset:2724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v794*/, off, s32 offset:2728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v795*/, off, s32 offset:2732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v796*/, off, s32 offset:2736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v797*/, off, s32 offset:2740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v798*/, off, s32 offset:2744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v799*/, off, s32 offset:2748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v800*/, off, s32 offset:2752 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v801*/, off, s32 offset:2756 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v802*/, off, s32 offset:2760 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v803*/, off, s32 offset:2764 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v804*/, off, s32 offset:2768 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v805*/, off, s32 offset:2772
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v806*/, off, s32 offset:2776
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v807*/, off, s32 offset:2780
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v808*/, off, s32 offset:2784
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v809*/, off, s32 offset:2788
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v810*/, off, s32 offset:2792
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v811*/, off, s32 offset:2796
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v812*/, off, s32 offset:2800
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v813*/, off, s32 offset:2804
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v814*/, off, s32 offset:2808
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v815*/, off, s32 offset:2812
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v816*/, off, s32 offset:2816
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v817*/, off, s32 offset:2820
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v818*/, off, s32 offset:2824
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v819*/, off, s32 offset:2828
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v820*/, off, s32 offset:2832
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v821*/, off, s32 offset:2836
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v822*/, off, s32 offset:2840
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v823*/, off, s32 offset:2844
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v824*/, off, s32 offset:2848
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v825*/, off, s32 offset:2852
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v826*/, off, s32 offset:2856
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v827*/, off, s32 offset:2860
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v828*/, off, s32 offset:2864
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v829*/, off, s32 offset:2868
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v830*/, off, s32 offset:2872
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v831*/, off, s32 offset:2876
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v832*/, off, s32 offset:2880
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v833*/, off, s32 offset:2884
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v834*/, off, s32 offset:2888
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v835*/, off, s32 offset:2892
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v836*/, off, s32 offset:2896
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v837*/, off, s32 offset:2900
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v838*/, off, s32 offset:2904
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v839*/, off, s32 offset:2908
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v840*/, off, s32 offset:2912
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v841*/, off, s32 offset:2916
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v842*/, off, s32 offset:2920
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v843*/, off, s32 offset:2924
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v844*/, off, s32 offset:2928
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v845*/, off, s32 offset:2932
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v846*/, off, s32 offset:2936
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v847*/, off, s32 offset:2940
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v848*/, off, s32 offset:2944
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v849*/, off, s32 offset:2948
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v850*/, off, s32 offset:2952
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v851*/, off, s32 offset:2956
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v852*/, off, s32 offset:2960
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v853*/, off, s32 offset:2964
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v854*/, off, s32 offset:2968
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v855*/, off, s32 offset:2972
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v856*/, off, s32 offset:2976
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v857*/, off, s32 offset:2980
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v858*/, off, s32 offset:2984
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v859*/, off, s32 offset:2988
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v860*/, off, s32 offset:2992
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v861*/, off, s32 offset:2996
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v862*/, off, s32 offset:3000
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v863*/, off, s32 offset:3004
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v864*/, off, s32 offset:3008
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v865*/, off, s32 offset:3012
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v866*/, off, s32 offset:3016
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v867*/, off, s32 offset:3020
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v805*/, off, s32 offset:2772 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v806*/, off, s32 offset:2776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v807*/, off, s32 offset:2780 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v808*/, off, s32 offset:2784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v809*/, off, s32 offset:2788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v810*/, off, s32 offset:2792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v811*/, off, s32 offset:2796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v812*/, off, s32 offset:2800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v813*/, off, s32 offset:2804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v814*/, off, s32 offset:2808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v815*/, off, s32 offset:2812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v816*/, off, s32 offset:2816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v817*/, off, s32 offset:2820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v818*/, off, s32 offset:2824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v819*/, off, s32 offset:2828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v820*/, off, s32 offset:2832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v821*/, off, s32 offset:2836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v822*/, off, s32 offset:2840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v823*/, off, s32 offset:2844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v824*/, off, s32 offset:2848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v825*/, off, s32 offset:2852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v826*/, off, s32 offset:2856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v827*/, off, s32 offset:2860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v828*/, off, s32 offset:2864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v829*/, off, s32 offset:2868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v830*/, off, s32 offset:2872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v831*/, off, s32 offset:2876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v832*/, off, s32 offset:2880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v833*/, off, s32 offset:2884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v834*/, off, s32 offset:2888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v835*/, off, s32 offset:2892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v836*/, off, s32 offset:2896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v837*/, off, s32 offset:2900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v838*/, off, s32 offset:2904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v839*/, off, s32 offset:2908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v840*/, off, s32 offset:2912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v841*/, off, s32 offset:2916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v842*/, off, s32 offset:2920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v843*/, off, s32 offset:2924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v844*/, off, s32 offset:2928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v845*/, off, s32 offset:2932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v846*/, off, s32 offset:2936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v847*/, off, s32 offset:2940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v848*/, off, s32 offset:2944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v849*/, off, s32 offset:2948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v850*/, off, s32 offset:2952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v851*/, off, s32 offset:2956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v852*/, off, s32 offset:2960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v853*/, off, s32 offset:2964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v854*/, off, s32 offset:2968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v855*/, off, s32 offset:2972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v856*/, off, s32 offset:2976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v857*/, off, s32 offset:2980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v858*/, off, s32 offset:2984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v859*/, off, s32 offset:2988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v860*/, off, s32 offset:2992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v861*/, off, s32 offset:2996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v862*/, off, s32 offset:3000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v863*/, off, s32 offset:3004 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v864*/, off, s32 offset:3008 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v865*/, off, s32 offset:3012 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v866*/, off, s32 offset:3016 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v867*/, off, s32 offset:3020 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v868*/, off, s32 offset:3024
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v869*/, off, s32 offset:3028
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v870*/, off, s32 offset:3032
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v871*/, off, s32 offset:3036
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v872*/, off, s32 offset:3040
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v873*/, off, s32 offset:3044
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v874*/, off, s32 offset:3048
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v875*/, off, s32 offset:3052
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v876*/, off, s32 offset:3056
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v877*/, off, s32 offset:3060
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v878*/, off, s32 offset:3064
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v879*/, off, s32 offset:3068
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v880*/, off, s32 offset:3072
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v881*/, off, s32 offset:3076
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v882*/, off, s32 offset:3080
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v883*/, off, s32 offset:3084
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v884*/, off, s32 offset:3088
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v885*/, off, s32 offset:3092
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v886*/, off, s32 offset:3096
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v887*/, off, s32 offset:3100
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v888*/, off, s32 offset:3104
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v889*/, off, s32 offset:3108
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v890*/, off, s32 offset:3112
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v891*/, off, s32 offset:3116
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v892*/, off, s32 offset:3120
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v893*/, off, s32 offset:3124
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v894*/, off, s32 offset:3128
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v895*/, off, s32 offset:3132
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v896*/, off, s32 offset:3136
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v897*/, off, s32 offset:3140
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v898*/, off, s32 offset:3144
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v899*/, off, s32 offset:3148
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v900*/, off, s32 offset:3152
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v901*/, off, s32 offset:3156
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v902*/, off, s32 offset:3160
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v903*/, off, s32 offset:3164
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v904*/, off, s32 offset:3168
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v905*/, off, s32 offset:3172
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v906*/, off, s32 offset:3176
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v907*/, off, s32 offset:3180
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v908*/, off, s32 offset:3184
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v909*/, off, s32 offset:3188
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v910*/, off, s32 offset:3192
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v911*/, off, s32 offset:3196
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v912*/, off, s32 offset:3200
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v913*/, off, s32 offset:3204
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v914*/, off, s32 offset:3208
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v915*/, off, s32 offset:3212
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v916*/, off, s32 offset:3216
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v917*/, off, s32 offset:3220
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v918*/, off, s32 offset:3224
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v919*/, off, s32 offset:3228
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v920*/, off, s32 offset:3232
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v921*/, off, s32 offset:3236
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v922*/, off, s32 offset:3240
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v923*/, off, s32 offset:3244
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v924*/, off, s32 offset:3248
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v925*/, off, s32 offset:3252
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v926*/, off, s32 offset:3256
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v927*/, off, s32 offset:3260
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v928*/, off, s32 offset:3264
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v929*/, off, s32 offset:3268
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v930*/, off, s32 offset:3272
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v868*/, off, s32 offset:3024 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v869*/, off, s32 offset:3028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v870*/, off, s32 offset:3032 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v871*/, off, s32 offset:3036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v872*/, off, s32 offset:3040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v873*/, off, s32 offset:3044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v874*/, off, s32 offset:3048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v875*/, off, s32 offset:3052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v876*/, off, s32 offset:3056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v877*/, off, s32 offset:3060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v878*/, off, s32 offset:3064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v879*/, off, s32 offset:3068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v880*/, off, s32 offset:3072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v881*/, off, s32 offset:3076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v882*/, off, s32 offset:3080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v883*/, off, s32 offset:3084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v884*/, off, s32 offset:3088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v885*/, off, s32 offset:3092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v886*/, off, s32 offset:3096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v887*/, off, s32 offset:3100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v888*/, off, s32 offset:3104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v889*/, off, s32 offset:3108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v890*/, off, s32 offset:3112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v891*/, off, s32 offset:3116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v892*/, off, s32 offset:3120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v893*/, off, s32 offset:3124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v894*/, off, s32 offset:3128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v895*/, off, s32 offset:3132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v896*/, off, s32 offset:3136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v897*/, off, s32 offset:3140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v898*/, off, s32 offset:3144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v899*/, off, s32 offset:3148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v900*/, off, s32 offset:3152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v901*/, off, s32 offset:3156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v902*/, off, s32 offset:3160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v903*/, off, s32 offset:3164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v904*/, off, s32 offset:3168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v905*/, off, s32 offset:3172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v906*/, off, s32 offset:3176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v907*/, off, s32 offset:3180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v908*/, off, s32 offset:3184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v909*/, off, s32 offset:3188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v910*/, off, s32 offset:3192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v911*/, off, s32 offset:3196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v912*/, off, s32 offset:3200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v913*/, off, s32 offset:3204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v914*/, off, s32 offset:3208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v915*/, off, s32 offset:3212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v916*/, off, s32 offset:3216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v917*/, off, s32 offset:3220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v918*/, off, s32 offset:3224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v919*/, off, s32 offset:3228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v920*/, off, s32 offset:3232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v921*/, off, s32 offset:3236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v922*/, off, s32 offset:3240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v923*/, off, s32 offset:3244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v924*/, off, s32 offset:3248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v925*/, off, s32 offset:3252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v926*/, off, s32 offset:3256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v927*/, off, s32 offset:3260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v928*/, off, s32 offset:3264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v929*/, off, s32 offset:3268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v930*/, off, s32 offset:3272 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v931*/, off, s32 offset:3276
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v932*/, off, s32 offset:3280
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v933*/, off, s32 offset:3284
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v934*/, off, s32 offset:3288
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v935*/, off, s32 offset:3292
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v936*/, off, s32 offset:3296
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v937*/, off, s32 offset:3300
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v938*/, off, s32 offset:3304
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v939*/, off, s32 offset:3308
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v940*/, off, s32 offset:3312
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v941*/, off, s32 offset:3316
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v942*/, off, s32 offset:3320
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v943*/, off, s32 offset:3324
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v944*/, off, s32 offset:3328
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v945*/, off, s32 offset:3332
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v946*/, off, s32 offset:3336
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v947*/, off, s32 offset:3340
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v948*/, off, s32 offset:3344
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v949*/, off, s32 offset:3348
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v950*/, off, s32 offset:3352
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v951*/, off, s32 offset:3356
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v952*/, off, s32 offset:3360
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v953*/, off, s32 offset:3364
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v954*/, off, s32 offset:3368
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v955*/, off, s32 offset:3372
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v956*/, off, s32 offset:3376
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v957*/, off, s32 offset:3380
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v958*/, off, s32 offset:3384
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v959*/, off, s32 offset:3388
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v960*/, off, s32 offset:3392
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v961*/, off, s32 offset:3396
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v962*/, off, s32 offset:3400
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v963*/, off, s32 offset:3404
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v964*/, off, s32 offset:3408
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v965*/, off, s32 offset:3412
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v966*/, off, s32 offset:3416
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v967*/, off, s32 offset:3420
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v968*/, off, s32 offset:3424
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v969*/, off, s32 offset:3428
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v970*/, off, s32 offset:3432
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v971*/, off, s32 offset:3436
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v972*/, off, s32 offset:3440
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v973*/, off, s32 offset:3444
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v974*/, off, s32 offset:3448
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v975*/, off, s32 offset:3452
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v976*/, off, s32 offset:3456
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v977*/, off, s32 offset:3460
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v978*/, off, s32 offset:3464
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v979*/, off, s32 offset:3468
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v980*/, off, s32 offset:3472
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v981*/, off, s32 offset:3476
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v982*/, off, s32 offset:3480
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v983*/, off, s32 offset:3484
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v984*/, off, s32 offset:3488
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v985*/, off, s32 offset:3492
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v986*/, off, s32 offset:3496
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v987*/, off, s32 offset:3500
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v988*/, off, s32 offset:3504
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v989*/, off, s32 offset:3508
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v990*/, off, s32 offset:3512
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v991*/, off, s32 offset:3516
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v992*/, off, s32 offset:3520
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v993*/, off, s32 offset:3524
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v931*/, off, s32 offset:3276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v932*/, off, s32 offset:3280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v933*/, off, s32 offset:3284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v934*/, off, s32 offset:3288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v935*/, off, s32 offset:3292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v936*/, off, s32 offset:3296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v937*/, off, s32 offset:3300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v938*/, off, s32 offset:3304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v939*/, off, s32 offset:3308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v940*/, off, s32 offset:3312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v941*/, off, s32 offset:3316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v942*/, off, s32 offset:3320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v943*/, off, s32 offset:3324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v944*/, off, s32 offset:3328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v945*/, off, s32 offset:3332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v946*/, off, s32 offset:3336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v947*/, off, s32 offset:3340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v948*/, off, s32 offset:3344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v949*/, off, s32 offset:3348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v950*/, off, s32 offset:3352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v951*/, off, s32 offset:3356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v952*/, off, s32 offset:3360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v953*/, off, s32 offset:3364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v954*/, off, s32 offset:3368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v955*/, off, s32 offset:3372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v956*/, off, s32 offset:3376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v957*/, off, s32 offset:3380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v958*/, off, s32 offset:3384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v959*/, off, s32 offset:3388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v960*/, off, s32 offset:3392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v961*/, off, s32 offset:3396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v962*/, off, s32 offset:3400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v963*/, off, s32 offset:3404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v964*/, off, s32 offset:3408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v965*/, off, s32 offset:3412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v966*/, off, s32 offset:3416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v967*/, off, s32 offset:3420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v968*/, off, s32 offset:3424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v969*/, off, s32 offset:3428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v970*/, off, s32 offset:3432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v971*/, off, s32 offset:3436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v972*/, off, s32 offset:3440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v973*/, off, s32 offset:3444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v974*/, off, s32 offset:3448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v975*/, off, s32 offset:3452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v976*/, off, s32 offset:3456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v977*/, off, s32 offset:3460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v978*/, off, s32 offset:3464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v979*/, off, s32 offset:3468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v980*/, off, s32 offset:3472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v981*/, off, s32 offset:3476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v982*/, off, s32 offset:3480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v983*/, off, s32 offset:3484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v984*/, off, s32 offset:3488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v985*/, off, s32 offset:3492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v986*/, off, s32 offset:3496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v987*/, off, s32 offset:3500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v988*/, off, s32 offset:3504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v989*/, off, s32 offset:3508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v990*/, off, s32 offset:3512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v991*/, off, s32 offset:3516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v992*/, off, s32 offset:3520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v993*/, off, s32 offset:3524 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x1d ; 120-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v994*/, off, s32 offset:3528
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v995*/, off, s32 offset:3532
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v996*/, off, s32 offset:3536
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v997*/, off, s32 offset:3540
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v998*/, off, s32 offset:3544
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v999*/, off, s32 offset:3548
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v1000*/, off, s32 offset:3552
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v1001*/, off, s32 offset:3556
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v1002*/, off, s32 offset:3560
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v1003*/, off, s32 offset:3564
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v1004*/, off, s32 offset:3568
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v1005*/, off, s32 offset:3572
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v1006*/, off, s32 offset:3576
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v1007*/, off, s32 offset:3580
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v1008*/, off, s32 offset:3584
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v1009*/, off, s32 offset:3588
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v1010*/, off, s32 offset:3592
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v1011*/, off, s32 offset:3596
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v1012*/, off, s32 offset:3600
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v1013*/, off, s32 offset:3604
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v1014*/, off, s32 offset:3608
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v1015*/, off, s32 offset:3612
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v1016*/, off, s32 offset:3616
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v1017*/, off, s32 offset:3620
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v1018*/, off, s32 offset:3624
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v1019*/, off, s32 offset:3628
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v1020*/, off, s32 offset:3632
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v1021*/, off, s32 offset:3636
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v1022*/, off, s32 offset:3640
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v1023*/, off, s32 offset:3644
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v994*/, off, s32 offset:3528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v995*/, off, s32 offset:3532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v996*/, off, s32 offset:3536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v997*/, off, s32 offset:3540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v998*/, off, s32 offset:3544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v999*/, off, s32 offset:3548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v1000*/, off, s32 offset:3552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v1001*/, off, s32 offset:3556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v1002*/, off, s32 offset:3560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v1003*/, off, s32 offset:3564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v1004*/, off, s32 offset:3568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v1005*/, off, s32 offset:3572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v1006*/, off, s32 offset:3576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v1007*/, off, s32 offset:3580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v1008*/, off, s32 offset:3584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v1009*/, off, s32 offset:3588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v1010*/, off, s32 offset:3592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v1011*/, off, s32 offset:3596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v1012*/, off, s32 offset:3600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v1013*/, off, s32 offset:3604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v1014*/, off, s32 offset:3608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v1015*/, off, s32 offset:3612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v1016*/, off, s32 offset:3616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v1017*/, off, s32 offset:3620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v1018*/, off, s32 offset:3624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v1019*/, off, s32 offset:3628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v1020*/, off, s32 offset:3632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v1021*/, off, s32 offset:3636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v1022*/, off, s32 offset:3640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v1023*/, off, s32 offset:3644 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, s0
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0xc000 ; msbs: dst=0 src0=0 src1=0 src2=0
@@ -9251,942 +9251,942 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float>
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 s33, s32
 ; GFX1250-DAGISEL-NEXT:    s_xor_saveexec_b32 s4, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s33 offset:4
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s33 offset:8
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2, s33 offset:12
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3, s33 offset:16
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4, s33 offset:20
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5, s33 offset:24
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6, s33 offset:28
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7, s33 offset:32
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8, s33 offset:36
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9, s33 offset:40
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10, s33 offset:44
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11, s33 offset:48
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12, s33 offset:52
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13, s33 offset:56
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14, s33 offset:60
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15, s33 offset:64
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16, s33 offset:68
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17, s33 offset:72
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18, s33 offset:76
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19, s33 offset:80
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20, s33 offset:84
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21, s33 offset:88
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22, s33 offset:92
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23, s33 offset:96
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24, s33 offset:100
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25, s33 offset:104
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26, s33 offset:108
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27, s33 offset:112
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28, s33 offset:116
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29, s33 offset:120
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30, s33 offset:124
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31, s33 offset:128
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32, s33 offset:132
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33, s33 offset:136
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34, s33 offset:140
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35, s33 offset:144
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36, s33 offset:148
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37, s33 offset:152
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38, s33 offset:156
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39, s33 offset:160
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48, s33 offset:172
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49, s33 offset:176
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50, s33 offset:180
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51, s33 offset:184
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52, s33 offset:188
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53, s33 offset:192
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54, s33 offset:196
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55, s33 offset:200
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64, s33 offset:204
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65, s33 offset:208
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66, s33 offset:212
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67, s33 offset:216
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68, s33 offset:220
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69, s33 offset:224
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70, s33 offset:228
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71, s33 offset:232
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80, s33 offset:236
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81, s33 offset:240
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82, s33 offset:244
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83, s33 offset:248
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84, s33 offset:252
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85, s33 offset:256
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86, s33 offset:260
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0, s33 offset:4 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1, s33 offset:8 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2, s33 offset:12 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3, s33 offset:16 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4, s33 offset:20 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5, s33 offset:24 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6, s33 offset:28 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7, s33 offset:32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8, s33 offset:36 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9, s33 offset:40 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10, s33 offset:44 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11, s33 offset:48 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12, s33 offset:52 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13, s33 offset:56 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14, s33 offset:60 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15, s33 offset:64 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16, s33 offset:68 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17, s33 offset:72 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18, s33 offset:76 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19, s33 offset:80 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20, s33 offset:84 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21, s33 offset:88 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22, s33 offset:92 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23, s33 offset:96 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24, s33 offset:100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25, s33 offset:104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26, s33 offset:108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27, s33 offset:112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28, s33 offset:116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29, s33 offset:120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30, s33 offset:124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31, s33 offset:128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32, s33 offset:132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33, s33 offset:136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34, s33 offset:140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35, s33 offset:144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36, s33 offset:148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37, s33 offset:152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38, s33 offset:156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39, s33 offset:160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48, s33 offset:172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49, s33 offset:176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50, s33 offset:180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51, s33 offset:184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52, s33 offset:188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53, s33 offset:192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54, s33 offset:196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55, s33 offset:200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64, s33 offset:204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65, s33 offset:208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66, s33 offset:212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67, s33 offset:216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68, s33 offset:220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69, s33 offset:224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70, s33 offset:228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71, s33 offset:232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80, s33 offset:236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81, s33 offset:240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82, s33 offset:244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83, s33 offset:248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84, s33 offset:252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85, s33 offset:256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86, s33 offset:260 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87, s33 offset:264
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96, s33 offset:268
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97, s33 offset:272
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98, s33 offset:276
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99, s33 offset:280
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100, s33 offset:284
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101, s33 offset:288
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102, s33 offset:292
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103, s33 offset:296
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112, s33 offset:300
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113, s33 offset:304
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114, s33 offset:308
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115, s33 offset:312
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116, s33 offset:316
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117, s33 offset:320
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118, s33 offset:324
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119, s33 offset:328
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128, s33 offset:332
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129, s33 offset:336
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130, s33 offset:340
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131, s33 offset:344
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132, s33 offset:348
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133, s33 offset:352
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134, s33 offset:356
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135, s33 offset:360
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144, s33 offset:364
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145, s33 offset:368
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146, s33 offset:372
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147, s33 offset:376
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148, s33 offset:380
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149, s33 offset:384
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150, s33 offset:388
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151, s33 offset:392
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160, s33 offset:396
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161, s33 offset:400
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162, s33 offset:404
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163, s33 offset:408
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164, s33 offset:412
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165, s33 offset:416
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166, s33 offset:420
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167, s33 offset:424
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176, s33 offset:428
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177, s33 offset:432
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178, s33 offset:436
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179, s33 offset:440
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180, s33 offset:444
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181, s33 offset:448
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182, s33 offset:452
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183, s33 offset:456
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192, s33 offset:460
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193, s33 offset:464
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194, s33 offset:468
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195, s33 offset:472
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196, s33 offset:476
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197, s33 offset:480
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198, s33 offset:484
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199, s33 offset:488
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208, s33 offset:492
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209, s33 offset:496
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210, s33 offset:500
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211, s33 offset:504
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212, s33 offset:508
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213, s33 offset:512
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87, s33 offset:264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96, s33 offset:268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97, s33 offset:272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98, s33 offset:276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99, s33 offset:280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100, s33 offset:284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101, s33 offset:288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102, s33 offset:292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103, s33 offset:296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112, s33 offset:300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113, s33 offset:304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114, s33 offset:308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115, s33 offset:312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116, s33 offset:316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117, s33 offset:320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118, s33 offset:324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119, s33 offset:328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128, s33 offset:332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129, s33 offset:336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130, s33 offset:340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131, s33 offset:344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132, s33 offset:348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133, s33 offset:352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134, s33 offset:356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135, s33 offset:360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144, s33 offset:364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145, s33 offset:368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146, s33 offset:372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147, s33 offset:376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148, s33 offset:380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149, s33 offset:384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150, s33 offset:388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151, s33 offset:392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160, s33 offset:396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161, s33 offset:400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162, s33 offset:404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163, s33 offset:408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164, s33 offset:412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165, s33 offset:416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166, s33 offset:420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167, s33 offset:424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176, s33 offset:428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177, s33 offset:432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178, s33 offset:436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179, s33 offset:440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180, s33 offset:444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181, s33 offset:448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182, s33 offset:452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183, s33 offset:456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192, s33 offset:460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193, s33 offset:464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194, s33 offset:468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195, s33 offset:472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196, s33 offset:476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197, s33 offset:480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198, s33 offset:484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199, s33 offset:488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208, s33 offset:492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209, s33 offset:496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210, s33 offset:500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211, s33 offset:504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212, s33 offset:508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213, s33 offset:512 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214, s33 offset:516
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215, s33 offset:520
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224, s33 offset:524
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225, s33 offset:528
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226, s33 offset:532
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227, s33 offset:536
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228, s33 offset:540
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229, s33 offset:544
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230, s33 offset:548
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231, s33 offset:552
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240, s33 offset:556
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241, s33 offset:560
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242, s33 offset:564
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243, s33 offset:568
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244, s33 offset:572
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245, s33 offset:576
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246, s33 offset:580
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247, s33 offset:584
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214, s33 offset:516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215, s33 offset:520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224, s33 offset:524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225, s33 offset:528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226, s33 offset:532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227, s33 offset:536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228, s33 offset:540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229, s33 offset:544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230, s33 offset:548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231, s33 offset:552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240, s33 offset:556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241, s33 offset:560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242, s33 offset:564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243, s33 offset:568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244, s33 offset:572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245, s33 offset:576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246, s33 offset:580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247, s33 offset:584 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 4 ; msbs: dst=0 src0=0 src1=1 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v256*/, s33 offset:588
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v257*/, s33 offset:592
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v258*/, s33 offset:596
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v259*/, s33 offset:600
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v260*/, s33 offset:604
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v261*/, s33 offset:608
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v262*/, s33 offset:612
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v263*/, s33 offset:616
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v264*/, s33 offset:620
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v265*/, s33 offset:624
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v266*/, s33 offset:628
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v267*/, s33 offset:632
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v268*/, s33 offset:636
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v269*/, s33 offset:640
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v270*/, s33 offset:644
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v271*/, s33 offset:648
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v272*/, s33 offset:652
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v273*/, s33 offset:656
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v274*/, s33 offset:660
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v275*/, s33 offset:664
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v276*/, s33 offset:668
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v277*/, s33 offset:672
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v278*/, s33 offset:676
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v279*/, s33 offset:680
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v280*/, s33 offset:684
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v281*/, s33 offset:688
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v282*/, s33 offset:692
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v283*/, s33 offset:696
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v284*/, s33 offset:700
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v285*/, s33 offset:704
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v286*/, s33 offset:708
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v287*/, s33 offset:712
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v288*/, s33 offset:716
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v289*/, s33 offset:720
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v290*/, s33 offset:724
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v291*/, s33 offset:728
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v292*/, s33 offset:732
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v293*/, s33 offset:736
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v294*/, s33 offset:740
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v295*/, s33 offset:744
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v296*/, s33 offset:748
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v297*/, s33 offset:752
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v298*/, s33 offset:756
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v299*/, s33 offset:760
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v300*/, s33 offset:764
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v256*/, s33 offset:588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v257*/, s33 offset:592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v258*/, s33 offset:596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v259*/, s33 offset:600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v260*/, s33 offset:604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v261*/, s33 offset:608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v262*/, s33 offset:612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v263*/, s33 offset:616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v264*/, s33 offset:620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v265*/, s33 offset:624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v266*/, s33 offset:628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v267*/, s33 offset:632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v268*/, s33 offset:636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v269*/, s33 offset:640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v270*/, s33 offset:644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v271*/, s33 offset:648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v272*/, s33 offset:652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v273*/, s33 offset:656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v274*/, s33 offset:660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v275*/, s33 offset:664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v276*/, s33 offset:668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v277*/, s33 offset:672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v278*/, s33 offset:676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v279*/, s33 offset:680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v280*/, s33 offset:684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v281*/, s33 offset:688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v282*/, s33 offset:692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v283*/, s33 offset:696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v284*/, s33 offset:700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v285*/, s33 offset:704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v286*/, s33 offset:708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v287*/, s33 offset:712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v288*/, s33 offset:716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v289*/, s33 offset:720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v290*/, s33 offset:724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v291*/, s33 offset:728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v292*/, s33 offset:732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v293*/, s33 offset:736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v294*/, s33 offset:740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v295*/, s33 offset:744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v296*/, s33 offset:748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v297*/, s33 offset:752 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v298*/, s33 offset:756 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v299*/, s33 offset:760 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v300*/, s33 offset:764 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v301*/, s33 offset:768
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v302*/, s33 offset:772
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v303*/, s33 offset:776
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v304*/, s33 offset:780
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v305*/, s33 offset:784
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v306*/, s33 offset:788
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v307*/, s33 offset:792
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v308*/, s33 offset:796
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v309*/, s33 offset:800
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v310*/, s33 offset:804
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v311*/, s33 offset:808
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v312*/, s33 offset:812
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v313*/, s33 offset:816
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v314*/, s33 offset:820
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v315*/, s33 offset:824
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v316*/, s33 offset:828
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v317*/, s33 offset:832
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v318*/, s33 offset:836
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v319*/, s33 offset:840
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v320*/, s33 offset:844
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v321*/, s33 offset:848
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v322*/, s33 offset:852
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v323*/, s33 offset:856
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v324*/, s33 offset:860
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v325*/, s33 offset:864
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v326*/, s33 offset:868
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v327*/, s33 offset:872
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v328*/, s33 offset:876
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v329*/, s33 offset:880
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v330*/, s33 offset:884
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v331*/, s33 offset:888
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v332*/, s33 offset:892
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v333*/, s33 offset:896
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v334*/, s33 offset:900
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v335*/, s33 offset:904
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v336*/, s33 offset:908
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v337*/, s33 offset:912
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v338*/, s33 offset:916
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v339*/, s33 offset:920
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v340*/, s33 offset:924
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v341*/, s33 offset:928
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v342*/, s33 offset:932
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v343*/, s33 offset:936
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v344*/, s33 offset:940
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v345*/, s33 offset:944
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v346*/, s33 offset:948
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v347*/, s33 offset:952
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v348*/, s33 offset:956
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v349*/, s33 offset:960
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v350*/, s33 offset:964
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v351*/, s33 offset:968
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v352*/, s33 offset:972
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v353*/, s33 offset:976
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v354*/, s33 offset:980
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v355*/, s33 offset:984
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v356*/, s33 offset:988
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v357*/, s33 offset:992
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v358*/, s33 offset:996
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v359*/, s33 offset:1000
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v360*/, s33 offset:1004
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v361*/, s33 offset:1008
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v362*/, s33 offset:1012
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v363*/, s33 offset:1016
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v301*/, s33 offset:768 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v302*/, s33 offset:772 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v303*/, s33 offset:776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v304*/, s33 offset:780 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v305*/, s33 offset:784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v306*/, s33 offset:788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v307*/, s33 offset:792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v308*/, s33 offset:796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v309*/, s33 offset:800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v310*/, s33 offset:804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v311*/, s33 offset:808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v312*/, s33 offset:812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v313*/, s33 offset:816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v314*/, s33 offset:820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v315*/, s33 offset:824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v316*/, s33 offset:828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v317*/, s33 offset:832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v318*/, s33 offset:836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v319*/, s33 offset:840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v320*/, s33 offset:844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v321*/, s33 offset:848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v322*/, s33 offset:852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v323*/, s33 offset:856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v324*/, s33 offset:860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v325*/, s33 offset:864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v326*/, s33 offset:868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v327*/, s33 offset:872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v328*/, s33 offset:876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v329*/, s33 offset:880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v330*/, s33 offset:884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v331*/, s33 offset:888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v332*/, s33 offset:892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v333*/, s33 offset:896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v334*/, s33 offset:900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v335*/, s33 offset:904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v336*/, s33 offset:908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v337*/, s33 offset:912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v338*/, s33 offset:916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v339*/, s33 offset:920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v340*/, s33 offset:924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v341*/, s33 offset:928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v342*/, s33 offset:932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v343*/, s33 offset:936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v344*/, s33 offset:940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v345*/, s33 offset:944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v346*/, s33 offset:948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v347*/, s33 offset:952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v348*/, s33 offset:956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v349*/, s33 offset:960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v350*/, s33 offset:964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v351*/, s33 offset:968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v352*/, s33 offset:972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v353*/, s33 offset:976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v354*/, s33 offset:980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v355*/, s33 offset:984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v356*/, s33 offset:988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v357*/, s33 offset:992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v358*/, s33 offset:996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v359*/, s33 offset:1000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v360*/, s33 offset:1004 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v361*/, s33 offset:1008 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v362*/, s33 offset:1012 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v363*/, s33 offset:1016 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v364*/, s33 offset:1020
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v365*/, s33 offset:1024
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v366*/, s33 offset:1028
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v367*/, s33 offset:1032
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v368*/, s33 offset:1036
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v369*/, s33 offset:1040
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v370*/, s33 offset:1044
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v371*/, s33 offset:1048
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v372*/, s33 offset:1052
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v373*/, s33 offset:1056
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v374*/, s33 offset:1060
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v375*/, s33 offset:1064
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v376*/, s33 offset:1068
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v377*/, s33 offset:1072
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v378*/, s33 offset:1076
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v379*/, s33 offset:1080
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v380*/, s33 offset:1084
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v381*/, s33 offset:1088
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v382*/, s33 offset:1092
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v383*/, s33 offset:1096
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v384*/, s33 offset:1100
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v385*/, s33 offset:1104
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v386*/, s33 offset:1108
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v387*/, s33 offset:1112
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v388*/, s33 offset:1116
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v389*/, s33 offset:1120
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v390*/, s33 offset:1124
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v391*/, s33 offset:1128
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v392*/, s33 offset:1132
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v393*/, s33 offset:1136
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v394*/, s33 offset:1140
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v395*/, s33 offset:1144
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v396*/, s33 offset:1148
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v397*/, s33 offset:1152
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v398*/, s33 offset:1156
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v399*/, s33 offset:1160
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v400*/, s33 offset:1164
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v401*/, s33 offset:1168
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v402*/, s33 offset:1172
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v403*/, s33 offset:1176
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v404*/, s33 offset:1180
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v405*/, s33 offset:1184
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v406*/, s33 offset:1188
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v407*/, s33 offset:1192
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v408*/, s33 offset:1196
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v409*/, s33 offset:1200
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v410*/, s33 offset:1204
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v411*/, s33 offset:1208
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v412*/, s33 offset:1212
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v413*/, s33 offset:1216
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v414*/, s33 offset:1220
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v415*/, s33 offset:1224
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v416*/, s33 offset:1228
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v417*/, s33 offset:1232
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v418*/, s33 offset:1236
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v419*/, s33 offset:1240
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v420*/, s33 offset:1244
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v421*/, s33 offset:1248
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v422*/, s33 offset:1252
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v423*/, s33 offset:1256
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v424*/, s33 offset:1260
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v425*/, s33 offset:1264
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v426*/, s33 offset:1268
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v364*/, s33 offset:1020 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v365*/, s33 offset:1024 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v366*/, s33 offset:1028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v367*/, s33 offset:1032 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v368*/, s33 offset:1036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v369*/, s33 offset:1040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v370*/, s33 offset:1044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v371*/, s33 offset:1048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v372*/, s33 offset:1052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v373*/, s33 offset:1056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v374*/, s33 offset:1060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v375*/, s33 offset:1064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v376*/, s33 offset:1068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v377*/, s33 offset:1072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v378*/, s33 offset:1076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v379*/, s33 offset:1080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v380*/, s33 offset:1084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v381*/, s33 offset:1088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v382*/, s33 offset:1092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v383*/, s33 offset:1096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v384*/, s33 offset:1100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v385*/, s33 offset:1104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v386*/, s33 offset:1108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v387*/, s33 offset:1112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v388*/, s33 offset:1116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v389*/, s33 offset:1120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v390*/, s33 offset:1124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v391*/, s33 offset:1128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v392*/, s33 offset:1132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v393*/, s33 offset:1136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v394*/, s33 offset:1140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v395*/, s33 offset:1144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v396*/, s33 offset:1148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v397*/, s33 offset:1152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v398*/, s33 offset:1156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v399*/, s33 offset:1160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v400*/, s33 offset:1164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v401*/, s33 offset:1168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v402*/, s33 offset:1172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v403*/, s33 offset:1176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v404*/, s33 offset:1180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v405*/, s33 offset:1184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v406*/, s33 offset:1188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v407*/, s33 offset:1192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v408*/, s33 offset:1196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v409*/, s33 offset:1200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v410*/, s33 offset:1204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v411*/, s33 offset:1208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v412*/, s33 offset:1212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v413*/, s33 offset:1216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v414*/, s33 offset:1220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v415*/, s33 offset:1224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v416*/, s33 offset:1228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v417*/, s33 offset:1232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v418*/, s33 offset:1236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v419*/, s33 offset:1240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v420*/, s33 offset:1244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v421*/, s33 offset:1248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v422*/, s33 offset:1252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v423*/, s33 offset:1256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v424*/, s33 offset:1260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v425*/, s33 offset:1264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v426*/, s33 offset:1268 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v427*/, s33 offset:1272
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v428*/, s33 offset:1276
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v429*/, s33 offset:1280
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v430*/, s33 offset:1284
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v431*/, s33 offset:1288
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v432*/, s33 offset:1292
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v433*/, s33 offset:1296
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v434*/, s33 offset:1300
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v435*/, s33 offset:1304
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v436*/, s33 offset:1308
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v437*/, s33 offset:1312
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v438*/, s33 offset:1316
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v439*/, s33 offset:1320
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v440*/, s33 offset:1324
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v441*/, s33 offset:1328
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v442*/, s33 offset:1332
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v443*/, s33 offset:1336
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v444*/, s33 offset:1340
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v445*/, s33 offset:1344
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v446*/, s33 offset:1348
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v447*/, s33 offset:1352
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v448*/, s33 offset:1356
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v449*/, s33 offset:1360
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v450*/, s33 offset:1364
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v451*/, s33 offset:1368
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v452*/, s33 offset:1372
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v453*/, s33 offset:1376
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v454*/, s33 offset:1380
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v455*/, s33 offset:1384
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v456*/, s33 offset:1388
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v457*/, s33 offset:1392
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v458*/, s33 offset:1396
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v459*/, s33 offset:1400
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v460*/, s33 offset:1404
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v461*/, s33 offset:1408
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v462*/, s33 offset:1412
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v463*/, s33 offset:1416
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v464*/, s33 offset:1420
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v465*/, s33 offset:1424
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v466*/, s33 offset:1428
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v467*/, s33 offset:1432
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v468*/, s33 offset:1436
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v469*/, s33 offset:1440
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v470*/, s33 offset:1444
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v471*/, s33 offset:1448
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v472*/, s33 offset:1452
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v473*/, s33 offset:1456
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v474*/, s33 offset:1460
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v475*/, s33 offset:1464
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v476*/, s33 offset:1468
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v477*/, s33 offset:1472
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v478*/, s33 offset:1476
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v479*/, s33 offset:1480
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v480*/, s33 offset:1484
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v481*/, s33 offset:1488
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v482*/, s33 offset:1492
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v483*/, s33 offset:1496
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v484*/, s33 offset:1500
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v485*/, s33 offset:1504
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v486*/, s33 offset:1508
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v487*/, s33 offset:1512
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v488*/, s33 offset:1516
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v489*/, s33 offset:1520
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v427*/, s33 offset:1272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v428*/, s33 offset:1276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v429*/, s33 offset:1280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v430*/, s33 offset:1284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v431*/, s33 offset:1288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v432*/, s33 offset:1292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v433*/, s33 offset:1296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v434*/, s33 offset:1300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v435*/, s33 offset:1304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v436*/, s33 offset:1308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v437*/, s33 offset:1312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v438*/, s33 offset:1316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v439*/, s33 offset:1320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v440*/, s33 offset:1324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v441*/, s33 offset:1328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v442*/, s33 offset:1332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v443*/, s33 offset:1336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v444*/, s33 offset:1340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v445*/, s33 offset:1344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v446*/, s33 offset:1348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v447*/, s33 offset:1352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v448*/, s33 offset:1356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v449*/, s33 offset:1360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v450*/, s33 offset:1364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v451*/, s33 offset:1368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v452*/, s33 offset:1372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v453*/, s33 offset:1376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v454*/, s33 offset:1380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v455*/, s33 offset:1384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v456*/, s33 offset:1388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v457*/, s33 offset:1392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v458*/, s33 offset:1396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v459*/, s33 offset:1400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v460*/, s33 offset:1404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v461*/, s33 offset:1408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v462*/, s33 offset:1412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v463*/, s33 offset:1416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v464*/, s33 offset:1420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v465*/, s33 offset:1424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v466*/, s33 offset:1428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v467*/, s33 offset:1432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v468*/, s33 offset:1436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v469*/, s33 offset:1440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v470*/, s33 offset:1444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v471*/, s33 offset:1448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v472*/, s33 offset:1452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v473*/, s33 offset:1456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v474*/, s33 offset:1460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v475*/, s33 offset:1464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v476*/, s33 offset:1468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v477*/, s33 offset:1472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v478*/, s33 offset:1476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v479*/, s33 offset:1480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v480*/, s33 offset:1484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v481*/, s33 offset:1488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v482*/, s33 offset:1492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v483*/, s33 offset:1496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v484*/, s33 offset:1500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v485*/, s33 offset:1504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v486*/, s33 offset:1508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v487*/, s33 offset:1512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v488*/, s33 offset:1516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v489*/, s33 offset:1520 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v490*/, s33 offset:1524
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v491*/, s33 offset:1528
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v492*/, s33 offset:1532
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v493*/, s33 offset:1536
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v494*/, s33 offset:1540
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v495*/, s33 offset:1544
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v496*/, s33 offset:1548
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v497*/, s33 offset:1552
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v498*/, s33 offset:1556
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v499*/, s33 offset:1560
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v500*/, s33 offset:1564
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v501*/, s33 offset:1568
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v502*/, s33 offset:1572
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v503*/, s33 offset:1576
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v504*/, s33 offset:1580
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v505*/, s33 offset:1584
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v506*/, s33 offset:1588
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v507*/, s33 offset:1592
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v508*/, s33 offset:1596
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v509*/, s33 offset:1600
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v510*/, s33 offset:1604
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v511*/, s33 offset:1608
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v490*/, s33 offset:1524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v491*/, s33 offset:1528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v492*/, s33 offset:1532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v493*/, s33 offset:1536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v494*/, s33 offset:1540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v495*/, s33 offset:1544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v496*/, s33 offset:1548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v497*/, s33 offset:1552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v498*/, s33 offset:1556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v499*/, s33 offset:1560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v500*/, s33 offset:1564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v501*/, s33 offset:1568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v502*/, s33 offset:1572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v503*/, s33 offset:1576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v504*/, s33 offset:1580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v505*/, s33 offset:1584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v506*/, s33 offset:1588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v507*/, s33 offset:1592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v508*/, s33 offset:1596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v509*/, s33 offset:1600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v510*/, s33 offset:1604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v511*/, s33 offset:1608 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x408 ; msbs: dst=0 src0=0 src1=2 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v512*/, s33 offset:1612
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v513*/, s33 offset:1616
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v514*/, s33 offset:1620
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v515*/, s33 offset:1624
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v516*/, s33 offset:1628
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v517*/, s33 offset:1632
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v518*/, s33 offset:1636
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v519*/, s33 offset:1640
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v520*/, s33 offset:1644
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v521*/, s33 offset:1648
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v522*/, s33 offset:1652
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v523*/, s33 offset:1656
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v524*/, s33 offset:1660
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v525*/, s33 offset:1664
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v526*/, s33 offset:1668
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v527*/, s33 offset:1672
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v528*/, s33 offset:1676
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v529*/, s33 offset:1680
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v530*/, s33 offset:1684
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v531*/, s33 offset:1688
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v532*/, s33 offset:1692
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v533*/, s33 offset:1696
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v534*/, s33 offset:1700
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v535*/, s33 offset:1704
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v536*/, s33 offset:1708
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v537*/, s33 offset:1712
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v538*/, s33 offset:1716
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v539*/, s33 offset:1720
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v540*/, s33 offset:1724
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v541*/, s33 offset:1728
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v542*/, s33 offset:1732
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v543*/, s33 offset:1736
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v544*/, s33 offset:1740
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v545*/, s33 offset:1744
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v546*/, s33 offset:1748
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v547*/, s33 offset:1752
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v548*/, s33 offset:1756
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v549*/, s33 offset:1760
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v550*/, s33 offset:1764
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v551*/, s33 offset:1768
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v552*/, s33 offset:1772
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v512*/, s33 offset:1612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v513*/, s33 offset:1616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v514*/, s33 offset:1620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v515*/, s33 offset:1624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v516*/, s33 offset:1628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v517*/, s33 offset:1632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v518*/, s33 offset:1636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v519*/, s33 offset:1640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v520*/, s33 offset:1644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v521*/, s33 offset:1648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v522*/, s33 offset:1652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v523*/, s33 offset:1656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v524*/, s33 offset:1660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v525*/, s33 offset:1664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v526*/, s33 offset:1668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v527*/, s33 offset:1672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v528*/, s33 offset:1676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v529*/, s33 offset:1680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v530*/, s33 offset:1684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v531*/, s33 offset:1688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v532*/, s33 offset:1692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v533*/, s33 offset:1696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v534*/, s33 offset:1700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v535*/, s33 offset:1704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v536*/, s33 offset:1708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v537*/, s33 offset:1712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v538*/, s33 offset:1716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v539*/, s33 offset:1720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v540*/, s33 offset:1724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v541*/, s33 offset:1728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v542*/, s33 offset:1732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v543*/, s33 offset:1736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v544*/, s33 offset:1740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v545*/, s33 offset:1744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v546*/, s33 offset:1748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v547*/, s33 offset:1752 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v548*/, s33 offset:1756 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v549*/, s33 offset:1760 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v550*/, s33 offset:1764 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v551*/, s33 offset:1768 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v552*/, s33 offset:1772 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v553*/, s33 offset:1776
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v554*/, s33 offset:1780
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v555*/, s33 offset:1784
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v556*/, s33 offset:1788
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v557*/, s33 offset:1792
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v558*/, s33 offset:1796
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v559*/, s33 offset:1800
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v560*/, s33 offset:1804
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v561*/, s33 offset:1808
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v562*/, s33 offset:1812
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v563*/, s33 offset:1816
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v564*/, s33 offset:1820
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v565*/, s33 offset:1824
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v566*/, s33 offset:1828
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v567*/, s33 offset:1832
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v568*/, s33 offset:1836
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v569*/, s33 offset:1840
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v570*/, s33 offset:1844
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v571*/, s33 offset:1848
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v572*/, s33 offset:1852
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v573*/, s33 offset:1856
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v574*/, s33 offset:1860
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v575*/, s33 offset:1864
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v576*/, s33 offset:1868
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v577*/, s33 offset:1872
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v578*/, s33 offset:1876
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v579*/, s33 offset:1880
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v580*/, s33 offset:1884
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v581*/, s33 offset:1888
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v582*/, s33 offset:1892
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v583*/, s33 offset:1896
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v584*/, s33 offset:1900
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v585*/, s33 offset:1904
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v586*/, s33 offset:1908
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v587*/, s33 offset:1912
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v588*/, s33 offset:1916
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v589*/, s33 offset:1920
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v590*/, s33 offset:1924
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v591*/, s33 offset:1928
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v592*/, s33 offset:1932
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v593*/, s33 offset:1936
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v594*/, s33 offset:1940
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v595*/, s33 offset:1944
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v596*/, s33 offset:1948
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v597*/, s33 offset:1952
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v598*/, s33 offset:1956
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v599*/, s33 offset:1960
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v600*/, s33 offset:1964
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v601*/, s33 offset:1968
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v602*/, s33 offset:1972
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v603*/, s33 offset:1976
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v604*/, s33 offset:1980
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v605*/, s33 offset:1984
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v606*/, s33 offset:1988
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v607*/, s33 offset:1992
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v608*/, s33 offset:1996
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v609*/, s33 offset:2000
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v610*/, s33 offset:2004
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v611*/, s33 offset:2008
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v612*/, s33 offset:2012
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v613*/, s33 offset:2016
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v614*/, s33 offset:2020
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v615*/, s33 offset:2024
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v553*/, s33 offset:1776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v554*/, s33 offset:1780 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v555*/, s33 offset:1784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v556*/, s33 offset:1788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v557*/, s33 offset:1792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v558*/, s33 offset:1796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v559*/, s33 offset:1800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v560*/, s33 offset:1804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v561*/, s33 offset:1808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v562*/, s33 offset:1812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v563*/, s33 offset:1816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v564*/, s33 offset:1820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v565*/, s33 offset:1824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v566*/, s33 offset:1828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v567*/, s33 offset:1832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v568*/, s33 offset:1836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v569*/, s33 offset:1840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v570*/, s33 offset:1844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v571*/, s33 offset:1848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v572*/, s33 offset:1852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v573*/, s33 offset:1856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v574*/, s33 offset:1860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v575*/, s33 offset:1864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v576*/, s33 offset:1868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v577*/, s33 offset:1872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v578*/, s33 offset:1876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v579*/, s33 offset:1880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v580*/, s33 offset:1884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v581*/, s33 offset:1888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v582*/, s33 offset:1892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v583*/, s33 offset:1896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v584*/, s33 offset:1900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v585*/, s33 offset:1904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v586*/, s33 offset:1908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v587*/, s33 offset:1912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v588*/, s33 offset:1916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v589*/, s33 offset:1920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v590*/, s33 offset:1924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v591*/, s33 offset:1928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v592*/, s33 offset:1932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v593*/, s33 offset:1936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v594*/, s33 offset:1940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v595*/, s33 offset:1944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v596*/, s33 offset:1948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v597*/, s33 offset:1952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v598*/, s33 offset:1956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v599*/, s33 offset:1960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v600*/, s33 offset:1964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v601*/, s33 offset:1968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v602*/, s33 offset:1972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v603*/, s33 offset:1976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v604*/, s33 offset:1980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v605*/, s33 offset:1984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v606*/, s33 offset:1988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v607*/, s33 offset:1992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v608*/, s33 offset:1996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v609*/, s33 offset:2000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v610*/, s33 offset:2004 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v611*/, s33 offset:2008 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v612*/, s33 offset:2012 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v613*/, s33 offset:2016 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v614*/, s33 offset:2020 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v615*/, s33 offset:2024 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v616*/, s33 offset:2028
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v617*/, s33 offset:2032
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v618*/, s33 offset:2036
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v619*/, s33 offset:2040
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v620*/, s33 offset:2044
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v621*/, s33 offset:2048
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v622*/, s33 offset:2052
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v623*/, s33 offset:2056
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v624*/, s33 offset:2060
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v625*/, s33 offset:2064
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v626*/, s33 offset:2068
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v627*/, s33 offset:2072
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v628*/, s33 offset:2076
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v629*/, s33 offset:2080
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v630*/, s33 offset:2084
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v631*/, s33 offset:2088
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v632*/, s33 offset:2092
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v633*/, s33 offset:2096
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v634*/, s33 offset:2100
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v635*/, s33 offset:2104
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v636*/, s33 offset:2108
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v637*/, s33 offset:2112
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v638*/, s33 offset:2116
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v639*/, s33 offset:2120
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v640*/, s33 offset:2124
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v641*/, s33 offset:2128
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v642*/, s33 offset:2132
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v643*/, s33 offset:2136
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v644*/, s33 offset:2140
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v645*/, s33 offset:2144
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v646*/, s33 offset:2148
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v647*/, s33 offset:2152
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v648*/, s33 offset:2156
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v649*/, s33 offset:2160
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v650*/, s33 offset:2164
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v651*/, s33 offset:2168
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v652*/, s33 offset:2172
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v653*/, s33 offset:2176
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v654*/, s33 offset:2180
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v655*/, s33 offset:2184
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v656*/, s33 offset:2188
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v657*/, s33 offset:2192
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v658*/, s33 offset:2196
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v659*/, s33 offset:2200
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v660*/, s33 offset:2204
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v661*/, s33 offset:2208
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v662*/, s33 offset:2212
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v663*/, s33 offset:2216
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v664*/, s33 offset:2220
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v665*/, s33 offset:2224
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v666*/, s33 offset:2228
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v667*/, s33 offset:2232
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v668*/, s33 offset:2236
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v669*/, s33 offset:2240
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v670*/, s33 offset:2244
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v671*/, s33 offset:2248
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v672*/, s33 offset:2252
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v673*/, s33 offset:2256
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v674*/, s33 offset:2260
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v675*/, s33 offset:2264
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v676*/, s33 offset:2268
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v677*/, s33 offset:2272
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v678*/, s33 offset:2276
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v616*/, s33 offset:2028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v617*/, s33 offset:2032 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v618*/, s33 offset:2036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v619*/, s33 offset:2040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v620*/, s33 offset:2044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v621*/, s33 offset:2048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v622*/, s33 offset:2052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v623*/, s33 offset:2056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v624*/, s33 offset:2060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v625*/, s33 offset:2064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v626*/, s33 offset:2068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v627*/, s33 offset:2072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v628*/, s33 offset:2076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v629*/, s33 offset:2080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v630*/, s33 offset:2084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v631*/, s33 offset:2088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v632*/, s33 offset:2092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v633*/, s33 offset:2096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v634*/, s33 offset:2100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v635*/, s33 offset:2104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v636*/, s33 offset:2108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v637*/, s33 offset:2112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v638*/, s33 offset:2116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v639*/, s33 offset:2120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v640*/, s33 offset:2124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v641*/, s33 offset:2128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v642*/, s33 offset:2132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v643*/, s33 offset:2136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v644*/, s33 offset:2140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v645*/, s33 offset:2144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v646*/, s33 offset:2148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v647*/, s33 offset:2152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v648*/, s33 offset:2156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v649*/, s33 offset:2160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v650*/, s33 offset:2164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v651*/, s33 offset:2168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v652*/, s33 offset:2172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v653*/, s33 offset:2176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v654*/, s33 offset:2180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v655*/, s33 offset:2184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v656*/, s33 offset:2188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v657*/, s33 offset:2192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v658*/, s33 offset:2196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v659*/, s33 offset:2200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v660*/, s33 offset:2204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v661*/, s33 offset:2208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v662*/, s33 offset:2212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v663*/, s33 offset:2216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v664*/, s33 offset:2220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v665*/, s33 offset:2224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v666*/, s33 offset:2228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v667*/, s33 offset:2232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v668*/, s33 offset:2236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v669*/, s33 offset:2240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v670*/, s33 offset:2244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v671*/, s33 offset:2248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v672*/, s33 offset:2252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v673*/, s33 offset:2256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v674*/, s33 offset:2260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v675*/, s33 offset:2264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v676*/, s33 offset:2268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v677*/, s33 offset:2272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v678*/, s33 offset:2276 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v679*/, s33 offset:2280
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v680*/, s33 offset:2284
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v681*/, s33 offset:2288
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v682*/, s33 offset:2292
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v683*/, s33 offset:2296
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v684*/, s33 offset:2300
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v685*/, s33 offset:2304
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v686*/, s33 offset:2308
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v687*/, s33 offset:2312
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v688*/, s33 offset:2316
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v689*/, s33 offset:2320
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v690*/, s33 offset:2324
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v691*/, s33 offset:2328
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v692*/, s33 offset:2332
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v693*/, s33 offset:2336
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v694*/, s33 offset:2340
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v695*/, s33 offset:2344
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v696*/, s33 offset:2348
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v697*/, s33 offset:2352
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v698*/, s33 offset:2356
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v699*/, s33 offset:2360
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v700*/, s33 offset:2364
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v701*/, s33 offset:2368
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v702*/, s33 offset:2372
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v703*/, s33 offset:2376
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v704*/, s33 offset:2380
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v705*/, s33 offset:2384
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v706*/, s33 offset:2388
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v707*/, s33 offset:2392
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v708*/, s33 offset:2396
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v709*/, s33 offset:2400
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v710*/, s33 offset:2404
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v711*/, s33 offset:2408
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v712*/, s33 offset:2412
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v713*/, s33 offset:2416
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v714*/, s33 offset:2420
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v715*/, s33 offset:2424
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v716*/, s33 offset:2428
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v717*/, s33 offset:2432
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v718*/, s33 offset:2436
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v719*/, s33 offset:2440
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v720*/, s33 offset:2444
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v721*/, s33 offset:2448
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v722*/, s33 offset:2452
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v723*/, s33 offset:2456
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v724*/, s33 offset:2460
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v725*/, s33 offset:2464
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v726*/, s33 offset:2468
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v727*/, s33 offset:2472
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v728*/, s33 offset:2476
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v729*/, s33 offset:2480
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v730*/, s33 offset:2484
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v731*/, s33 offset:2488
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v732*/, s33 offset:2492
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v733*/, s33 offset:2496
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v734*/, s33 offset:2500
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v735*/, s33 offset:2504
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v736*/, s33 offset:2508
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v737*/, s33 offset:2512
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v738*/, s33 offset:2516
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v739*/, s33 offset:2520
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v740*/, s33 offset:2524
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v741*/, s33 offset:2528
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v679*/, s33 offset:2280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v680*/, s33 offset:2284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v681*/, s33 offset:2288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v682*/, s33 offset:2292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v683*/, s33 offset:2296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v684*/, s33 offset:2300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v685*/, s33 offset:2304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v686*/, s33 offset:2308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v687*/, s33 offset:2312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v688*/, s33 offset:2316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v689*/, s33 offset:2320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v690*/, s33 offset:2324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v691*/, s33 offset:2328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v692*/, s33 offset:2332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v693*/, s33 offset:2336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v694*/, s33 offset:2340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v695*/, s33 offset:2344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v696*/, s33 offset:2348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v697*/, s33 offset:2352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v698*/, s33 offset:2356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v699*/, s33 offset:2360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v700*/, s33 offset:2364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v701*/, s33 offset:2368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v702*/, s33 offset:2372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v703*/, s33 offset:2376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v704*/, s33 offset:2380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v705*/, s33 offset:2384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v706*/, s33 offset:2388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v707*/, s33 offset:2392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v708*/, s33 offset:2396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v709*/, s33 offset:2400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v710*/, s33 offset:2404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v711*/, s33 offset:2408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v712*/, s33 offset:2412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v713*/, s33 offset:2416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v714*/, s33 offset:2420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v715*/, s33 offset:2424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v716*/, s33 offset:2428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v717*/, s33 offset:2432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v718*/, s33 offset:2436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v719*/, s33 offset:2440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v720*/, s33 offset:2444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v721*/, s33 offset:2448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v722*/, s33 offset:2452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v723*/, s33 offset:2456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v724*/, s33 offset:2460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v725*/, s33 offset:2464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v726*/, s33 offset:2468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v727*/, s33 offset:2472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v728*/, s33 offset:2476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v729*/, s33 offset:2480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v730*/, s33 offset:2484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v731*/, s33 offset:2488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v732*/, s33 offset:2492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v733*/, s33 offset:2496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v734*/, s33 offset:2500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v735*/, s33 offset:2504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v736*/, s33 offset:2508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v737*/, s33 offset:2512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v738*/, s33 offset:2516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v739*/, s33 offset:2520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v740*/, s33 offset:2524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v741*/, s33 offset:2528 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v742*/, s33 offset:2532
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v743*/, s33 offset:2536
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v744*/, s33 offset:2540
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v745*/, s33 offset:2544
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v746*/, s33 offset:2548
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v747*/, s33 offset:2552
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v748*/, s33 offset:2556
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v749*/, s33 offset:2560
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v750*/, s33 offset:2564
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v751*/, s33 offset:2568
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v752*/, s33 offset:2572
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v753*/, s33 offset:2576
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v754*/, s33 offset:2580
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v755*/, s33 offset:2584
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v756*/, s33 offset:2588
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v757*/, s33 offset:2592
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v758*/, s33 offset:2596
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v759*/, s33 offset:2600
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v760*/, s33 offset:2604
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v761*/, s33 offset:2608
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v762*/, s33 offset:2612
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v763*/, s33 offset:2616
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v764*/, s33 offset:2620
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v765*/, s33 offset:2624
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v766*/, s33 offset:2628
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v767*/, s33 offset:2632
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v742*/, s33 offset:2532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v743*/, s33 offset:2536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v744*/, s33 offset:2540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v745*/, s33 offset:2544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v746*/, s33 offset:2548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v747*/, s33 offset:2552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v748*/, s33 offset:2556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v749*/, s33 offset:2560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v750*/, s33 offset:2564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v751*/, s33 offset:2568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v752*/, s33 offset:2572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v753*/, s33 offset:2576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v754*/, s33 offset:2580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v755*/, s33 offset:2584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v756*/, s33 offset:2588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v757*/, s33 offset:2592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v758*/, s33 offset:2596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v759*/, s33 offset:2600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v760*/, s33 offset:2604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v761*/, s33 offset:2608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v762*/, s33 offset:2612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v763*/, s33 offset:2616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v764*/, s33 offset:2620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v765*/, s33 offset:2624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v766*/, s33 offset:2628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v767*/, s33 offset:2632 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x80c ; msbs: dst=0 src0=0 src1=3 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v768*/, s33 offset:2636
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v769*/, s33 offset:2640
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v770*/, s33 offset:2644
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v771*/, s33 offset:2648
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v772*/, s33 offset:2652
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v773*/, s33 offset:2656
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v774*/, s33 offset:2660
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v775*/, s33 offset:2664
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v776*/, s33 offset:2668
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v777*/, s33 offset:2672
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v778*/, s33 offset:2676
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v779*/, s33 offset:2680
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v780*/, s33 offset:2684
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v781*/, s33 offset:2688
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v782*/, s33 offset:2692
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v783*/, s33 offset:2696
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v784*/, s33 offset:2700
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v785*/, s33 offset:2704
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v786*/, s33 offset:2708
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v787*/, s33 offset:2712
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v788*/, s33 offset:2716
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v789*/, s33 offset:2720
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v790*/, s33 offset:2724
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v791*/, s33 offset:2728
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v792*/, s33 offset:2732
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v793*/, s33 offset:2736
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v794*/, s33 offset:2740
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v795*/, s33 offset:2744
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v796*/, s33 offset:2748
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v797*/, s33 offset:2752
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v798*/, s33 offset:2756
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v799*/, s33 offset:2760
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v800*/, s33 offset:2764
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v801*/, s33 offset:2768
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v802*/, s33 offset:2772
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v803*/, s33 offset:2776
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v804*/, s33 offset:2780
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v0 /*v768*/, s33 offset:2636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v1 /*v769*/, s33 offset:2640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v2 /*v770*/, s33 offset:2644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v3 /*v771*/, s33 offset:2648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v4 /*v772*/, s33 offset:2652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v5 /*v773*/, s33 offset:2656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v6 /*v774*/, s33 offset:2660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v7 /*v775*/, s33 offset:2664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v8 /*v776*/, s33 offset:2668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v9 /*v777*/, s33 offset:2672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v10 /*v778*/, s33 offset:2676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v11 /*v779*/, s33 offset:2680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v12 /*v780*/, s33 offset:2684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v13 /*v781*/, s33 offset:2688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v14 /*v782*/, s33 offset:2692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v15 /*v783*/, s33 offset:2696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v16 /*v784*/, s33 offset:2700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v17 /*v785*/, s33 offset:2704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v18 /*v786*/, s33 offset:2708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v19 /*v787*/, s33 offset:2712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v20 /*v788*/, s33 offset:2716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v21 /*v789*/, s33 offset:2720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v22 /*v790*/, s33 offset:2724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v23 /*v791*/, s33 offset:2728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v24 /*v792*/, s33 offset:2732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v25 /*v793*/, s33 offset:2736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v26 /*v794*/, s33 offset:2740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v27 /*v795*/, s33 offset:2744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v28 /*v796*/, s33 offset:2748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v29 /*v797*/, s33 offset:2752 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v30 /*v798*/, s33 offset:2756 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v31 /*v799*/, s33 offset:2760 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v32 /*v800*/, s33 offset:2764 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v33 /*v801*/, s33 offset:2768 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v34 /*v802*/, s33 offset:2772 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v35 /*v803*/, s33 offset:2776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v36 /*v804*/, s33 offset:2780 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v805*/, s33 offset:2784
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v806*/, s33 offset:2788
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v807*/, s33 offset:2792
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v808*/, s33 offset:2796
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v809*/, s33 offset:2800
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v810*/, s33 offset:2804
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v811*/, s33 offset:2808
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v812*/, s33 offset:2812
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v813*/, s33 offset:2816
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v814*/, s33 offset:2820
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v815*/, s33 offset:2824
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v816*/, s33 offset:2828
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v817*/, s33 offset:2832
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v818*/, s33 offset:2836
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v819*/, s33 offset:2840
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v820*/, s33 offset:2844
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v821*/, s33 offset:2848
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v822*/, s33 offset:2852
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v823*/, s33 offset:2856
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v824*/, s33 offset:2860
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v825*/, s33 offset:2864
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v826*/, s33 offset:2868
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v827*/, s33 offset:2872
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v828*/, s33 offset:2876
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v829*/, s33 offset:2880
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v830*/, s33 offset:2884
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v831*/, s33 offset:2888
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v832*/, s33 offset:2892
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v833*/, s33 offset:2896
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v834*/, s33 offset:2900
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v835*/, s33 offset:2904
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v836*/, s33 offset:2908
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v837*/, s33 offset:2912
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v838*/, s33 offset:2916
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v839*/, s33 offset:2920
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v840*/, s33 offset:2924
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v841*/, s33 offset:2928
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v842*/, s33 offset:2932
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v843*/, s33 offset:2936
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v844*/, s33 offset:2940
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v845*/, s33 offset:2944
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v846*/, s33 offset:2948
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v847*/, s33 offset:2952
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v848*/, s33 offset:2956
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v849*/, s33 offset:2960
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v850*/, s33 offset:2964
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v851*/, s33 offset:2968
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v852*/, s33 offset:2972
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v853*/, s33 offset:2976
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v854*/, s33 offset:2980
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v855*/, s33 offset:2984
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v856*/, s33 offset:2988
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v857*/, s33 offset:2992
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v858*/, s33 offset:2996
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v859*/, s33 offset:3000
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v860*/, s33 offset:3004
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v861*/, s33 offset:3008
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v862*/, s33 offset:3012
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v863*/, s33 offset:3016
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v864*/, s33 offset:3020
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v865*/, s33 offset:3024
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v866*/, s33 offset:3028
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v867*/, s33 offset:3032
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v37 /*v805*/, s33 offset:2784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v38 /*v806*/, s33 offset:2788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v39 /*v807*/, s33 offset:2792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40 /*v808*/, s33 offset:2796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41 /*v809*/, s33 offset:2800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42 /*v810*/, s33 offset:2804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v43 /*v811*/, s33 offset:2808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v44 /*v812*/, s33 offset:2812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v45 /*v813*/, s33 offset:2816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v46 /*v814*/, s33 offset:2820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v47 /*v815*/, s33 offset:2824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v48 /*v816*/, s33 offset:2828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v49 /*v817*/, s33 offset:2832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v50 /*v818*/, s33 offset:2836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v51 /*v819*/, s33 offset:2840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v52 /*v820*/, s33 offset:2844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v53 /*v821*/, s33 offset:2848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v54 /*v822*/, s33 offset:2852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v55 /*v823*/, s33 offset:2856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v56 /*v824*/, s33 offset:2860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v57 /*v825*/, s33 offset:2864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v58 /*v826*/, s33 offset:2868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v59 /*v827*/, s33 offset:2872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v60 /*v828*/, s33 offset:2876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v61 /*v829*/, s33 offset:2880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v62 /*v830*/, s33 offset:2884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v63 /*v831*/, s33 offset:2888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v64 /*v832*/, s33 offset:2892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v65 /*v833*/, s33 offset:2896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v66 /*v834*/, s33 offset:2900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v67 /*v835*/, s33 offset:2904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v68 /*v836*/, s33 offset:2908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v69 /*v837*/, s33 offset:2912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v70 /*v838*/, s33 offset:2916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v71 /*v839*/, s33 offset:2920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v72 /*v840*/, s33 offset:2924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v73 /*v841*/, s33 offset:2928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v74 /*v842*/, s33 offset:2932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v75 /*v843*/, s33 offset:2936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v76 /*v844*/, s33 offset:2940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v77 /*v845*/, s33 offset:2944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v78 /*v846*/, s33 offset:2948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v79 /*v847*/, s33 offset:2952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v80 /*v848*/, s33 offset:2956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v81 /*v849*/, s33 offset:2960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v82 /*v850*/, s33 offset:2964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v83 /*v851*/, s33 offset:2968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v84 /*v852*/, s33 offset:2972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v85 /*v853*/, s33 offset:2976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v86 /*v854*/, s33 offset:2980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v87 /*v855*/, s33 offset:2984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v88 /*v856*/, s33 offset:2988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v89 /*v857*/, s33 offset:2992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v90 /*v858*/, s33 offset:2996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v91 /*v859*/, s33 offset:3000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v92 /*v860*/, s33 offset:3004 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v93 /*v861*/, s33 offset:3008 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v94 /*v862*/, s33 offset:3012 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v95 /*v863*/, s33 offset:3016 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v96 /*v864*/, s33 offset:3020 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v97 /*v865*/, s33 offset:3024 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v98 /*v866*/, s33 offset:3028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v99 /*v867*/, s33 offset:3032 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v868*/, s33 offset:3036
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v869*/, s33 offset:3040
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v870*/, s33 offset:3044
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v871*/, s33 offset:3048
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v872*/, s33 offset:3052
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v873*/, s33 offset:3056
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v874*/, s33 offset:3060
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v875*/, s33 offset:3064
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v876*/, s33 offset:3068
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v877*/, s33 offset:3072
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v878*/, s33 offset:3076
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v879*/, s33 offset:3080
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v880*/, s33 offset:3084
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v881*/, s33 offset:3088
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v882*/, s33 offset:3092
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v883*/, s33 offset:3096
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v884*/, s33 offset:3100
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v885*/, s33 offset:3104
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v886*/, s33 offset:3108
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v887*/, s33 offset:3112
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v888*/, s33 offset:3116
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v889*/, s33 offset:3120
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v890*/, s33 offset:3124
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v891*/, s33 offset:3128
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v892*/, s33 offset:3132
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v893*/, s33 offset:3136
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v894*/, s33 offset:3140
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v895*/, s33 offset:3144
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v896*/, s33 offset:3148
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v897*/, s33 offset:3152
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v898*/, s33 offset:3156
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v899*/, s33 offset:3160
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v900*/, s33 offset:3164
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v901*/, s33 offset:3168
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v902*/, s33 offset:3172
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v903*/, s33 offset:3176
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v904*/, s33 offset:3180
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v905*/, s33 offset:3184
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v906*/, s33 offset:3188
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v907*/, s33 offset:3192
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v908*/, s33 offset:3196
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v909*/, s33 offset:3200
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v910*/, s33 offset:3204
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v911*/, s33 offset:3208
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v912*/, s33 offset:3212
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v913*/, s33 offset:3216
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v914*/, s33 offset:3220
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v915*/, s33 offset:3224
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v916*/, s33 offset:3228
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v917*/, s33 offset:3232
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v918*/, s33 offset:3236
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v919*/, s33 offset:3240
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v920*/, s33 offset:3244
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v921*/, s33 offset:3248
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v922*/, s33 offset:3252
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v923*/, s33 offset:3256
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v924*/, s33 offset:3260
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v925*/, s33 offset:3264
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v926*/, s33 offset:3268
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v927*/, s33 offset:3272
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v928*/, s33 offset:3276
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v929*/, s33 offset:3280
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v930*/, s33 offset:3284
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v100 /*v868*/, s33 offset:3036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v101 /*v869*/, s33 offset:3040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v102 /*v870*/, s33 offset:3044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v103 /*v871*/, s33 offset:3048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v104 /*v872*/, s33 offset:3052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v105 /*v873*/, s33 offset:3056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v106 /*v874*/, s33 offset:3060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v107 /*v875*/, s33 offset:3064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v108 /*v876*/, s33 offset:3068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v109 /*v877*/, s33 offset:3072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v110 /*v878*/, s33 offset:3076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v111 /*v879*/, s33 offset:3080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v112 /*v880*/, s33 offset:3084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v113 /*v881*/, s33 offset:3088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v114 /*v882*/, s33 offset:3092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v115 /*v883*/, s33 offset:3096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v116 /*v884*/, s33 offset:3100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v117 /*v885*/, s33 offset:3104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v118 /*v886*/, s33 offset:3108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v119 /*v887*/, s33 offset:3112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v120 /*v888*/, s33 offset:3116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v121 /*v889*/, s33 offset:3120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v122 /*v890*/, s33 offset:3124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v123 /*v891*/, s33 offset:3128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v124 /*v892*/, s33 offset:3132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v125 /*v893*/, s33 offset:3136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v126 /*v894*/, s33 offset:3140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v127 /*v895*/, s33 offset:3144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v128 /*v896*/, s33 offset:3148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v129 /*v897*/, s33 offset:3152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v130 /*v898*/, s33 offset:3156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v131 /*v899*/, s33 offset:3160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v132 /*v900*/, s33 offset:3164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v133 /*v901*/, s33 offset:3168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v134 /*v902*/, s33 offset:3172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v135 /*v903*/, s33 offset:3176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v136 /*v904*/, s33 offset:3180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v137 /*v905*/, s33 offset:3184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v138 /*v906*/, s33 offset:3188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v139 /*v907*/, s33 offset:3192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v140 /*v908*/, s33 offset:3196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v141 /*v909*/, s33 offset:3200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v142 /*v910*/, s33 offset:3204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v143 /*v911*/, s33 offset:3208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v144 /*v912*/, s33 offset:3212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v145 /*v913*/, s33 offset:3216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v146 /*v914*/, s33 offset:3220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v147 /*v915*/, s33 offset:3224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v148 /*v916*/, s33 offset:3228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v149 /*v917*/, s33 offset:3232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v150 /*v918*/, s33 offset:3236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v151 /*v919*/, s33 offset:3240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v152 /*v920*/, s33 offset:3244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v153 /*v921*/, s33 offset:3248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v154 /*v922*/, s33 offset:3252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v155 /*v923*/, s33 offset:3256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v156 /*v924*/, s33 offset:3260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v157 /*v925*/, s33 offset:3264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v158 /*v926*/, s33 offset:3268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v159 /*v927*/, s33 offset:3272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v160 /*v928*/, s33 offset:3276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v161 /*v929*/, s33 offset:3280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v162 /*v930*/, s33 offset:3284 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v931*/, s33 offset:3288
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v932*/, s33 offset:3292
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v933*/, s33 offset:3296
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v934*/, s33 offset:3300
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v935*/, s33 offset:3304
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v936*/, s33 offset:3308
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v937*/, s33 offset:3312
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v938*/, s33 offset:3316
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v939*/, s33 offset:3320
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v940*/, s33 offset:3324
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v941*/, s33 offset:3328
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v942*/, s33 offset:3332
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v943*/, s33 offset:3336
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v944*/, s33 offset:3340
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v945*/, s33 offset:3344
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v946*/, s33 offset:3348
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v947*/, s33 offset:3352
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v948*/, s33 offset:3356
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v949*/, s33 offset:3360
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v950*/, s33 offset:3364
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v951*/, s33 offset:3368
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v952*/, s33 offset:3372
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v953*/, s33 offset:3376
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v954*/, s33 offset:3380
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v955*/, s33 offset:3384
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v956*/, s33 offset:3388
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v957*/, s33 offset:3392
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v958*/, s33 offset:3396
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v959*/, s33 offset:3400
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v960*/, s33 offset:3404
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v961*/, s33 offset:3408
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v962*/, s33 offset:3412
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v963*/, s33 offset:3416
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v964*/, s33 offset:3420
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v965*/, s33 offset:3424
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v966*/, s33 offset:3428
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v967*/, s33 offset:3432
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v968*/, s33 offset:3436
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v969*/, s33 offset:3440
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v970*/, s33 offset:3444
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v971*/, s33 offset:3448
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v972*/, s33 offset:3452
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v973*/, s33 offset:3456
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v974*/, s33 offset:3460
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v975*/, s33 offset:3464
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v976*/, s33 offset:3468
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v977*/, s33 offset:3472
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v978*/, s33 offset:3476
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v979*/, s33 offset:3480
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v980*/, s33 offset:3484
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v981*/, s33 offset:3488
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v982*/, s33 offset:3492
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v983*/, s33 offset:3496
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v984*/, s33 offset:3500
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v985*/, s33 offset:3504
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v986*/, s33 offset:3508
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v987*/, s33 offset:3512
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v988*/, s33 offset:3516
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v989*/, s33 offset:3520
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v990*/, s33 offset:3524
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v991*/, s33 offset:3528
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v992*/, s33 offset:3532
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v993*/, s33 offset:3536
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v163 /*v931*/, s33 offset:3288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v164 /*v932*/, s33 offset:3292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v165 /*v933*/, s33 offset:3296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v166 /*v934*/, s33 offset:3300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v167 /*v935*/, s33 offset:3304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v168 /*v936*/, s33 offset:3308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v169 /*v937*/, s33 offset:3312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v170 /*v938*/, s33 offset:3316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v171 /*v939*/, s33 offset:3320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v172 /*v940*/, s33 offset:3324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v173 /*v941*/, s33 offset:3328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v174 /*v942*/, s33 offset:3332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v175 /*v943*/, s33 offset:3336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v176 /*v944*/, s33 offset:3340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v177 /*v945*/, s33 offset:3344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v178 /*v946*/, s33 offset:3348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v179 /*v947*/, s33 offset:3352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v180 /*v948*/, s33 offset:3356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v181 /*v949*/, s33 offset:3360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v182 /*v950*/, s33 offset:3364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v183 /*v951*/, s33 offset:3368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v184 /*v952*/, s33 offset:3372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v185 /*v953*/, s33 offset:3376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v186 /*v954*/, s33 offset:3380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v187 /*v955*/, s33 offset:3384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v188 /*v956*/, s33 offset:3388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v189 /*v957*/, s33 offset:3392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v190 /*v958*/, s33 offset:3396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v191 /*v959*/, s33 offset:3400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v192 /*v960*/, s33 offset:3404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v193 /*v961*/, s33 offset:3408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v194 /*v962*/, s33 offset:3412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v195 /*v963*/, s33 offset:3416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v196 /*v964*/, s33 offset:3420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v197 /*v965*/, s33 offset:3424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v198 /*v966*/, s33 offset:3428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v199 /*v967*/, s33 offset:3432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v200 /*v968*/, s33 offset:3436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v201 /*v969*/, s33 offset:3440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v202 /*v970*/, s33 offset:3444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v203 /*v971*/, s33 offset:3448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v204 /*v972*/, s33 offset:3452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v205 /*v973*/, s33 offset:3456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v206 /*v974*/, s33 offset:3460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v207 /*v975*/, s33 offset:3464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v208 /*v976*/, s33 offset:3468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v209 /*v977*/, s33 offset:3472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v210 /*v978*/, s33 offset:3476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v211 /*v979*/, s33 offset:3480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v212 /*v980*/, s33 offset:3484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v213 /*v981*/, s33 offset:3488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v214 /*v982*/, s33 offset:3492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v215 /*v983*/, s33 offset:3496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v216 /*v984*/, s33 offset:3500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v217 /*v985*/, s33 offset:3504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v218 /*v986*/, s33 offset:3508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v219 /*v987*/, s33 offset:3512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v220 /*v988*/, s33 offset:3516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v221 /*v989*/, s33 offset:3520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v222 /*v990*/, s33 offset:3524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v223 /*v991*/, s33 offset:3528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v224 /*v992*/, s33 offset:3532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v225 /*v993*/, s33 offset:3536 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x1d ; 120-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v994*/, s33 offset:3540
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v995*/, s33 offset:3544
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v996*/, s33 offset:3548
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v997*/, s33 offset:3552
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v998*/, s33 offset:3556
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v999*/, s33 offset:3560
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v1000*/, s33 offset:3564
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v1001*/, s33 offset:3568
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v1002*/, s33 offset:3572
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v1003*/, s33 offset:3576
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v1004*/, s33 offset:3580
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v1005*/, s33 offset:3584
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v1006*/, s33 offset:3588
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v1007*/, s33 offset:3592
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v1008*/, s33 offset:3596
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v1009*/, s33 offset:3600
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v1010*/, s33 offset:3604
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v1011*/, s33 offset:3608
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v1012*/, s33 offset:3612
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v1013*/, s33 offset:3616
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v1014*/, s33 offset:3620
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v1015*/, s33 offset:3624
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v1016*/, s33 offset:3628
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v1017*/, s33 offset:3632
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v1018*/, s33 offset:3636
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v1019*/, s33 offset:3640
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v1020*/, s33 offset:3644
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v1021*/, s33 offset:3648
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v1022*/, s33 offset:3652
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v1023*/, s33 offset:3656
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v226 /*v994*/, s33 offset:3540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v227 /*v995*/, s33 offset:3544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v228 /*v996*/, s33 offset:3548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v229 /*v997*/, s33 offset:3552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v230 /*v998*/, s33 offset:3556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v231 /*v999*/, s33 offset:3560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v232 /*v1000*/, s33 offset:3564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v233 /*v1001*/, s33 offset:3568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v234 /*v1002*/, s33 offset:3572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v235 /*v1003*/, s33 offset:3576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v236 /*v1004*/, s33 offset:3580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v237 /*v1005*/, s33 offset:3584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v238 /*v1006*/, s33 offset:3588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v239 /*v1007*/, s33 offset:3592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v240 /*v1008*/, s33 offset:3596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v241 /*v1009*/, s33 offset:3600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v242 /*v1010*/, s33 offset:3604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v243 /*v1011*/, s33 offset:3608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v244 /*v1012*/, s33 offset:3612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v245 /*v1013*/, s33 offset:3616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v246 /*v1014*/, s33 offset:3620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v247 /*v1015*/, s33 offset:3624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v248 /*v1016*/, s33 offset:3628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v249 /*v1017*/, s33 offset:3632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v250 /*v1018*/, s33 offset:3636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v251 /*v1019*/, s33 offset:3640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v252 /*v1020*/, s33 offset:3644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v253 /*v1021*/, s33 offset:3648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v254 /*v1022*/, s33 offset:3652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v255 /*v1023*/, s33 offset:3656 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, -1
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0xc00 ; msbs: dst=0 src0=0 src1=0 src2=0
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x2 ; 12-byte Folded Spill
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42, s33
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40, s33 offset:164
-; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41, s33 offset:168
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v42, s33 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v40, s33 offset:164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_store_b32 off, v41, s33 offset:168 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x2
 ; GFX1250-DAGISEL-NEXT:    v_writelane_b32 v42, s0, 3
 ; GFX1250-DAGISEL-NEXT:    s_mov_b64 s[0:1], callee at abs64
@@ -10203,942 +10203,942 @@ define amdgpu_gfx_whole_wave void @call_from_whole_wave(i1 %unused, <8 x float>
 ; GFX1250-DAGISEL-NEXT:    v_readlane_b32 s4, v42, 0
 ; GFX1250-DAGISEL-NEXT:    v_readlane_b32 s0, v42, 3
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x2 ; 12-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42, off, s33
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40, off, s33 offset:164
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41, off, s33 offset:168
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42, off, s33 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40, off, s33 offset:164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41, off, s33 offset:168 nv
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 s32, s33
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_xor_b32 exec_lo, s4, -1
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s33 offset:4
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s33 offset:8
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2, off, s33 offset:12
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3, off, s33 offset:16
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4, off, s33 offset:20
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5, off, s33 offset:24
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6, off, s33 offset:28
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7, off, s33 offset:32
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8, off, s33 offset:36
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9, off, s33 offset:40
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10, off, s33 offset:44
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11, off, s33 offset:48
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12, off, s33 offset:52
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13, off, s33 offset:56
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14, off, s33 offset:60
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15, off, s33 offset:64
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16, off, s33 offset:68
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17, off, s33 offset:72
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18, off, s33 offset:76
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19, off, s33 offset:80
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20, off, s33 offset:84
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21, off, s33 offset:88
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22, off, s33 offset:92
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23, off, s33 offset:96
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24, off, s33 offset:100
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25, off, s33 offset:104
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26, off, s33 offset:108
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27, off, s33 offset:112
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28, off, s33 offset:116
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29, off, s33 offset:120
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30, off, s33 offset:124
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31, off, s33 offset:128
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32, off, s33 offset:132
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33, off, s33 offset:136
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34, off, s33 offset:140
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35, off, s33 offset:144
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36, off, s33 offset:148
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37, off, s33 offset:152
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38, off, s33 offset:156
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39, off, s33 offset:160
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48, off, s33 offset:172
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49, off, s33 offset:176
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50, off, s33 offset:180
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51, off, s33 offset:184
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52, off, s33 offset:188
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53, off, s33 offset:192
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54, off, s33 offset:196
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55, off, s33 offset:200
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64, off, s33 offset:204
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65, off, s33 offset:208
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66, off, s33 offset:212
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67, off, s33 offset:216
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68, off, s33 offset:220
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69, off, s33 offset:224
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70, off, s33 offset:228
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71, off, s33 offset:232
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80, off, s33 offset:236
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81, off, s33 offset:240
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82, off, s33 offset:244
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83, off, s33 offset:248
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84, off, s33 offset:252
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85, off, s33 offset:256
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86, off, s33 offset:260
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0, off, s33 offset:4 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1, off, s33 offset:8 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2, off, s33 offset:12 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3, off, s33 offset:16 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4, off, s33 offset:20 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5, off, s33 offset:24 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6, off, s33 offset:28 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7, off, s33 offset:32 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8, off, s33 offset:36 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9, off, s33 offset:40 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10, off, s33 offset:44 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11, off, s33 offset:48 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12, off, s33 offset:52 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13, off, s33 offset:56 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14, off, s33 offset:60 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15, off, s33 offset:64 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16, off, s33 offset:68 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17, off, s33 offset:72 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18, off, s33 offset:76 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19, off, s33 offset:80 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20, off, s33 offset:84 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21, off, s33 offset:88 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22, off, s33 offset:92 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23, off, s33 offset:96 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24, off, s33 offset:100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25, off, s33 offset:104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26, off, s33 offset:108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27, off, s33 offset:112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28, off, s33 offset:116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29, off, s33 offset:120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30, off, s33 offset:124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31, off, s33 offset:128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32, off, s33 offset:132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33, off, s33 offset:136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34, off, s33 offset:140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35, off, s33 offset:144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36, off, s33 offset:148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37, off, s33 offset:152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38, off, s33 offset:156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39, off, s33 offset:160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48, off, s33 offset:172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49, off, s33 offset:176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50, off, s33 offset:180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51, off, s33 offset:184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52, off, s33 offset:188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53, off, s33 offset:192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54, off, s33 offset:196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55, off, s33 offset:200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64, off, s33 offset:204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65, off, s33 offset:208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66, off, s33 offset:212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67, off, s33 offset:216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68, off, s33 offset:220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69, off, s33 offset:224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70, off, s33 offset:228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71, off, s33 offset:232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80, off, s33 offset:236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81, off, s33 offset:240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82, off, s33 offset:244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83, off, s33 offset:248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84, off, s33 offset:252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85, off, s33 offset:256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86, off, s33 offset:260 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87, off, s33 offset:264
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96, off, s33 offset:268
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97, off, s33 offset:272
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98, off, s33 offset:276
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99, off, s33 offset:280
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100, off, s33 offset:284
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101, off, s33 offset:288
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102, off, s33 offset:292
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103, off, s33 offset:296
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112, off, s33 offset:300
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113, off, s33 offset:304
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114, off, s33 offset:308
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115, off, s33 offset:312
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116, off, s33 offset:316
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117, off, s33 offset:320
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118, off, s33 offset:324
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119, off, s33 offset:328
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128, off, s33 offset:332
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129, off, s33 offset:336
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130, off, s33 offset:340
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131, off, s33 offset:344
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132, off, s33 offset:348
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133, off, s33 offset:352
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134, off, s33 offset:356
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135, off, s33 offset:360
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144, off, s33 offset:364
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145, off, s33 offset:368
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146, off, s33 offset:372
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147, off, s33 offset:376
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148, off, s33 offset:380
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149, off, s33 offset:384
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150, off, s33 offset:388
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151, off, s33 offset:392
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160, off, s33 offset:396
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161, off, s33 offset:400
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162, off, s33 offset:404
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163, off, s33 offset:408
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164, off, s33 offset:412
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165, off, s33 offset:416
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166, off, s33 offset:420
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167, off, s33 offset:424
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176, off, s33 offset:428
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177, off, s33 offset:432
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178, off, s33 offset:436
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179, off, s33 offset:440
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180, off, s33 offset:444
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181, off, s33 offset:448
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182, off, s33 offset:452
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183, off, s33 offset:456
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192, off, s33 offset:460
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193, off, s33 offset:464
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194, off, s33 offset:468
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195, off, s33 offset:472
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196, off, s33 offset:476
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197, off, s33 offset:480
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198, off, s33 offset:484
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199, off, s33 offset:488
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208, off, s33 offset:492
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209, off, s33 offset:496
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210, off, s33 offset:500
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211, off, s33 offset:504
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212, off, s33 offset:508
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213, off, s33 offset:512
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87, off, s33 offset:264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96, off, s33 offset:268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97, off, s33 offset:272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98, off, s33 offset:276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99, off, s33 offset:280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100, off, s33 offset:284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101, off, s33 offset:288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102, off, s33 offset:292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103, off, s33 offset:296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112, off, s33 offset:300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113, off, s33 offset:304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114, off, s33 offset:308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115, off, s33 offset:312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116, off, s33 offset:316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117, off, s33 offset:320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118, off, s33 offset:324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119, off, s33 offset:328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128, off, s33 offset:332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129, off, s33 offset:336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130, off, s33 offset:340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131, off, s33 offset:344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132, off, s33 offset:348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133, off, s33 offset:352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134, off, s33 offset:356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135, off, s33 offset:360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144, off, s33 offset:364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145, off, s33 offset:368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146, off, s33 offset:372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147, off, s33 offset:376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148, off, s33 offset:380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149, off, s33 offset:384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150, off, s33 offset:388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151, off, s33 offset:392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160, off, s33 offset:396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161, off, s33 offset:400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162, off, s33 offset:404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163, off, s33 offset:408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164, off, s33 offset:412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165, off, s33 offset:416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166, off, s33 offset:420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167, off, s33 offset:424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176, off, s33 offset:428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177, off, s33 offset:432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178, off, s33 offset:436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179, off, s33 offset:440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180, off, s33 offset:444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181, off, s33 offset:448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182, off, s33 offset:452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183, off, s33 offset:456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192, off, s33 offset:460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193, off, s33 offset:464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194, off, s33 offset:468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195, off, s33 offset:472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196, off, s33 offset:476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197, off, s33 offset:480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198, off, s33 offset:484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199, off, s33 offset:488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208, off, s33 offset:492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209, off, s33 offset:496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210, off, s33 offset:500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211, off, s33 offset:504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212, off, s33 offset:508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213, off, s33 offset:512 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214, off, s33 offset:516
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215, off, s33 offset:520
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224, off, s33 offset:524
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225, off, s33 offset:528
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226, off, s33 offset:532
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227, off, s33 offset:536
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228, off, s33 offset:540
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229, off, s33 offset:544
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230, off, s33 offset:548
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231, off, s33 offset:552
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240, off, s33 offset:556
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241, off, s33 offset:560
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242, off, s33 offset:564
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243, off, s33 offset:568
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244, off, s33 offset:572
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245, off, s33 offset:576
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246, off, s33 offset:580
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247, off, s33 offset:584
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214, off, s33 offset:516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215, off, s33 offset:520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224, off, s33 offset:524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225, off, s33 offset:528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226, off, s33 offset:532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227, off, s33 offset:536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228, off, s33 offset:540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229, off, s33 offset:544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230, off, s33 offset:548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231, off, s33 offset:552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240, off, s33 offset:556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241, off, s33 offset:560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242, off, s33 offset:564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243, off, s33 offset:568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244, off, s33 offset:572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245, off, s33 offset:576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246, off, s33 offset:580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247, off, s33 offset:584 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 64 ; msbs: dst=1 src0=0 src1=0 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v256*/, off, s33 offset:588
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v257*/, off, s33 offset:592
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v258*/, off, s33 offset:596
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v259*/, off, s33 offset:600
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v260*/, off, s33 offset:604
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v261*/, off, s33 offset:608
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v262*/, off, s33 offset:612
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v263*/, off, s33 offset:616
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v264*/, off, s33 offset:620
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v265*/, off, s33 offset:624
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v266*/, off, s33 offset:628
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v267*/, off, s33 offset:632
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v268*/, off, s33 offset:636
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v269*/, off, s33 offset:640
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v270*/, off, s33 offset:644
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v271*/, off, s33 offset:648
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v272*/, off, s33 offset:652
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v273*/, off, s33 offset:656
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v274*/, off, s33 offset:660
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v275*/, off, s33 offset:664
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v276*/, off, s33 offset:668
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v277*/, off, s33 offset:672
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v278*/, off, s33 offset:676
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v279*/, off, s33 offset:680
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v280*/, off, s33 offset:684
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v281*/, off, s33 offset:688
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v282*/, off, s33 offset:692
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v283*/, off, s33 offset:696
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v284*/, off, s33 offset:700
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v285*/, off, s33 offset:704
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v286*/, off, s33 offset:708
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v287*/, off, s33 offset:712
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v288*/, off, s33 offset:716
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v289*/, off, s33 offset:720
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v290*/, off, s33 offset:724
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v291*/, off, s33 offset:728
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v292*/, off, s33 offset:732
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v293*/, off, s33 offset:736
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v294*/, off, s33 offset:740
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v295*/, off, s33 offset:744
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v296*/, off, s33 offset:748
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v297*/, off, s33 offset:752
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v298*/, off, s33 offset:756
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v299*/, off, s33 offset:760
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v300*/, off, s33 offset:764
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v256*/, off, s33 offset:588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v257*/, off, s33 offset:592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v258*/, off, s33 offset:596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v259*/, off, s33 offset:600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v260*/, off, s33 offset:604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v261*/, off, s33 offset:608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v262*/, off, s33 offset:612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v263*/, off, s33 offset:616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v264*/, off, s33 offset:620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v265*/, off, s33 offset:624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v266*/, off, s33 offset:628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v267*/, off, s33 offset:632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v268*/, off, s33 offset:636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v269*/, off, s33 offset:640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v270*/, off, s33 offset:644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v271*/, off, s33 offset:648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v272*/, off, s33 offset:652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v273*/, off, s33 offset:656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v274*/, off, s33 offset:660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v275*/, off, s33 offset:664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v276*/, off, s33 offset:668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v277*/, off, s33 offset:672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v278*/, off, s33 offset:676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v279*/, off, s33 offset:680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v280*/, off, s33 offset:684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v281*/, off, s33 offset:688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v282*/, off, s33 offset:692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v283*/, off, s33 offset:696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v284*/, off, s33 offset:700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v285*/, off, s33 offset:704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v286*/, off, s33 offset:708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v287*/, off, s33 offset:712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v288*/, off, s33 offset:716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v289*/, off, s33 offset:720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v290*/, off, s33 offset:724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v291*/, off, s33 offset:728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v292*/, off, s33 offset:732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v293*/, off, s33 offset:736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v294*/, off, s33 offset:740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v295*/, off, s33 offset:744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v296*/, off, s33 offset:748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v297*/, off, s33 offset:752 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v298*/, off, s33 offset:756 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v299*/, off, s33 offset:760 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v300*/, off, s33 offset:764 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v301*/, off, s33 offset:768
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v302*/, off, s33 offset:772
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v303*/, off, s33 offset:776
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v304*/, off, s33 offset:780
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v305*/, off, s33 offset:784
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v306*/, off, s33 offset:788
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v307*/, off, s33 offset:792
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v308*/, off, s33 offset:796
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v309*/, off, s33 offset:800
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v310*/, off, s33 offset:804
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v311*/, off, s33 offset:808
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v312*/, off, s33 offset:812
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v313*/, off, s33 offset:816
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v314*/, off, s33 offset:820
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v315*/, off, s33 offset:824
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v316*/, off, s33 offset:828
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v317*/, off, s33 offset:832
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v318*/, off, s33 offset:836
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v319*/, off, s33 offset:840
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v320*/, off, s33 offset:844
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v321*/, off, s33 offset:848
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v322*/, off, s33 offset:852
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v323*/, off, s33 offset:856
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v324*/, off, s33 offset:860
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v325*/, off, s33 offset:864
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v326*/, off, s33 offset:868
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v327*/, off, s33 offset:872
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v328*/, off, s33 offset:876
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v329*/, off, s33 offset:880
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v330*/, off, s33 offset:884
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v331*/, off, s33 offset:888
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v332*/, off, s33 offset:892
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v333*/, off, s33 offset:896
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v334*/, off, s33 offset:900
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v335*/, off, s33 offset:904
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v336*/, off, s33 offset:908
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v337*/, off, s33 offset:912
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v338*/, off, s33 offset:916
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v339*/, off, s33 offset:920
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v340*/, off, s33 offset:924
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v341*/, off, s33 offset:928
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v342*/, off, s33 offset:932
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v343*/, off, s33 offset:936
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v344*/, off, s33 offset:940
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v345*/, off, s33 offset:944
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v346*/, off, s33 offset:948
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v347*/, off, s33 offset:952
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v348*/, off, s33 offset:956
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v349*/, off, s33 offset:960
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v350*/, off, s33 offset:964
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v351*/, off, s33 offset:968
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v352*/, off, s33 offset:972
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v353*/, off, s33 offset:976
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v354*/, off, s33 offset:980
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v355*/, off, s33 offset:984
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v356*/, off, s33 offset:988
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v357*/, off, s33 offset:992
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v358*/, off, s33 offset:996
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v359*/, off, s33 offset:1000
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v360*/, off, s33 offset:1004
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v361*/, off, s33 offset:1008
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v362*/, off, s33 offset:1012
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v363*/, off, s33 offset:1016
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v301*/, off, s33 offset:768 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v302*/, off, s33 offset:772 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v303*/, off, s33 offset:776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v304*/, off, s33 offset:780 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v305*/, off, s33 offset:784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v306*/, off, s33 offset:788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v307*/, off, s33 offset:792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v308*/, off, s33 offset:796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v309*/, off, s33 offset:800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v310*/, off, s33 offset:804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v311*/, off, s33 offset:808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v312*/, off, s33 offset:812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v313*/, off, s33 offset:816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v314*/, off, s33 offset:820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v315*/, off, s33 offset:824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v316*/, off, s33 offset:828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v317*/, off, s33 offset:832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v318*/, off, s33 offset:836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v319*/, off, s33 offset:840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v320*/, off, s33 offset:844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v321*/, off, s33 offset:848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v322*/, off, s33 offset:852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v323*/, off, s33 offset:856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v324*/, off, s33 offset:860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v325*/, off, s33 offset:864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v326*/, off, s33 offset:868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v327*/, off, s33 offset:872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v328*/, off, s33 offset:876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v329*/, off, s33 offset:880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v330*/, off, s33 offset:884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v331*/, off, s33 offset:888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v332*/, off, s33 offset:892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v333*/, off, s33 offset:896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v334*/, off, s33 offset:900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v335*/, off, s33 offset:904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v336*/, off, s33 offset:908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v337*/, off, s33 offset:912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v338*/, off, s33 offset:916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v339*/, off, s33 offset:920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v340*/, off, s33 offset:924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v341*/, off, s33 offset:928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v342*/, off, s33 offset:932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v343*/, off, s33 offset:936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v344*/, off, s33 offset:940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v345*/, off, s33 offset:944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v346*/, off, s33 offset:948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v347*/, off, s33 offset:952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v348*/, off, s33 offset:956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v349*/, off, s33 offset:960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v350*/, off, s33 offset:964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v351*/, off, s33 offset:968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v352*/, off, s33 offset:972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v353*/, off, s33 offset:976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v354*/, off, s33 offset:980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v355*/, off, s33 offset:984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v356*/, off, s33 offset:988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v357*/, off, s33 offset:992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v358*/, off, s33 offset:996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v359*/, off, s33 offset:1000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v360*/, off, s33 offset:1004 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v361*/, off, s33 offset:1008 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v362*/, off, s33 offset:1012 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v363*/, off, s33 offset:1016 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v364*/, off, s33 offset:1020
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v365*/, off, s33 offset:1024
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v366*/, off, s33 offset:1028
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v367*/, off, s33 offset:1032
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v368*/, off, s33 offset:1036
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v369*/, off, s33 offset:1040
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v370*/, off, s33 offset:1044
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v371*/, off, s33 offset:1048
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v372*/, off, s33 offset:1052
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v373*/, off, s33 offset:1056
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v374*/, off, s33 offset:1060
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v375*/, off, s33 offset:1064
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v376*/, off, s33 offset:1068
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v377*/, off, s33 offset:1072
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v378*/, off, s33 offset:1076
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v379*/, off, s33 offset:1080
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v380*/, off, s33 offset:1084
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v381*/, off, s33 offset:1088
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v382*/, off, s33 offset:1092
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v383*/, off, s33 offset:1096
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v384*/, off, s33 offset:1100
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v385*/, off, s33 offset:1104
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v386*/, off, s33 offset:1108
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v387*/, off, s33 offset:1112
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v388*/, off, s33 offset:1116
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v389*/, off, s33 offset:1120
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v390*/, off, s33 offset:1124
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v391*/, off, s33 offset:1128
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v392*/, off, s33 offset:1132
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v393*/, off, s33 offset:1136
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v394*/, off, s33 offset:1140
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v395*/, off, s33 offset:1144
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v396*/, off, s33 offset:1148
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v397*/, off, s33 offset:1152
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v398*/, off, s33 offset:1156
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v399*/, off, s33 offset:1160
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v400*/, off, s33 offset:1164
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v401*/, off, s33 offset:1168
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v402*/, off, s33 offset:1172
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v403*/, off, s33 offset:1176
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v404*/, off, s33 offset:1180
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v405*/, off, s33 offset:1184
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v406*/, off, s33 offset:1188
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v407*/, off, s33 offset:1192
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v408*/, off, s33 offset:1196
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v409*/, off, s33 offset:1200
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v410*/, off, s33 offset:1204
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v411*/, off, s33 offset:1208
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v412*/, off, s33 offset:1212
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v413*/, off, s33 offset:1216
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v414*/, off, s33 offset:1220
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v415*/, off, s33 offset:1224
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v416*/, off, s33 offset:1228
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v417*/, off, s33 offset:1232
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v418*/, off, s33 offset:1236
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v419*/, off, s33 offset:1240
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v420*/, off, s33 offset:1244
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v421*/, off, s33 offset:1248
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v422*/, off, s33 offset:1252
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v423*/, off, s33 offset:1256
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v424*/, off, s33 offset:1260
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v425*/, off, s33 offset:1264
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v426*/, off, s33 offset:1268
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v364*/, off, s33 offset:1020 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v365*/, off, s33 offset:1024 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v366*/, off, s33 offset:1028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v367*/, off, s33 offset:1032 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v368*/, off, s33 offset:1036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v369*/, off, s33 offset:1040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v370*/, off, s33 offset:1044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v371*/, off, s33 offset:1048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v372*/, off, s33 offset:1052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v373*/, off, s33 offset:1056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v374*/, off, s33 offset:1060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v375*/, off, s33 offset:1064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v376*/, off, s33 offset:1068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v377*/, off, s33 offset:1072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v378*/, off, s33 offset:1076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v379*/, off, s33 offset:1080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v380*/, off, s33 offset:1084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v381*/, off, s33 offset:1088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v382*/, off, s33 offset:1092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v383*/, off, s33 offset:1096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v384*/, off, s33 offset:1100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v385*/, off, s33 offset:1104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v386*/, off, s33 offset:1108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v387*/, off, s33 offset:1112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v388*/, off, s33 offset:1116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v389*/, off, s33 offset:1120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v390*/, off, s33 offset:1124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v391*/, off, s33 offset:1128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v392*/, off, s33 offset:1132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v393*/, off, s33 offset:1136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v394*/, off, s33 offset:1140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v395*/, off, s33 offset:1144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v396*/, off, s33 offset:1148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v397*/, off, s33 offset:1152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v398*/, off, s33 offset:1156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v399*/, off, s33 offset:1160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v400*/, off, s33 offset:1164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v401*/, off, s33 offset:1168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v402*/, off, s33 offset:1172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v403*/, off, s33 offset:1176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v404*/, off, s33 offset:1180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v405*/, off, s33 offset:1184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v406*/, off, s33 offset:1188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v407*/, off, s33 offset:1192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v408*/, off, s33 offset:1196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v409*/, off, s33 offset:1200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v410*/, off, s33 offset:1204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v411*/, off, s33 offset:1208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v412*/, off, s33 offset:1212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v413*/, off, s33 offset:1216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v414*/, off, s33 offset:1220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v415*/, off, s33 offset:1224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v416*/, off, s33 offset:1228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v417*/, off, s33 offset:1232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v418*/, off, s33 offset:1236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v419*/, off, s33 offset:1240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v420*/, off, s33 offset:1244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v421*/, off, s33 offset:1248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v422*/, off, s33 offset:1252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v423*/, off, s33 offset:1256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v424*/, off, s33 offset:1260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v425*/, off, s33 offset:1264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v426*/, off, s33 offset:1268 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v427*/, off, s33 offset:1272
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v428*/, off, s33 offset:1276
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v429*/, off, s33 offset:1280
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v430*/, off, s33 offset:1284
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v431*/, off, s33 offset:1288
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v432*/, off, s33 offset:1292
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v433*/, off, s33 offset:1296
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v434*/, off, s33 offset:1300
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v435*/, off, s33 offset:1304
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v436*/, off, s33 offset:1308
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v437*/, off, s33 offset:1312
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v438*/, off, s33 offset:1316
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v439*/, off, s33 offset:1320
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v440*/, off, s33 offset:1324
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v441*/, off, s33 offset:1328
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v442*/, off, s33 offset:1332
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v443*/, off, s33 offset:1336
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v444*/, off, s33 offset:1340
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v445*/, off, s33 offset:1344
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v446*/, off, s33 offset:1348
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v447*/, off, s33 offset:1352
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v448*/, off, s33 offset:1356
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v449*/, off, s33 offset:1360
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v450*/, off, s33 offset:1364
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v451*/, off, s33 offset:1368
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v452*/, off, s33 offset:1372
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v453*/, off, s33 offset:1376
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v454*/, off, s33 offset:1380
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v455*/, off, s33 offset:1384
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v456*/, off, s33 offset:1388
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v457*/, off, s33 offset:1392
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v458*/, off, s33 offset:1396
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v459*/, off, s33 offset:1400
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v460*/, off, s33 offset:1404
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v461*/, off, s33 offset:1408
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v462*/, off, s33 offset:1412
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v463*/, off, s33 offset:1416
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v464*/, off, s33 offset:1420
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v465*/, off, s33 offset:1424
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v466*/, off, s33 offset:1428
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v467*/, off, s33 offset:1432
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v468*/, off, s33 offset:1436
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v469*/, off, s33 offset:1440
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v470*/, off, s33 offset:1444
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v471*/, off, s33 offset:1448
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v472*/, off, s33 offset:1452
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v473*/, off, s33 offset:1456
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v474*/, off, s33 offset:1460
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v475*/, off, s33 offset:1464
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v476*/, off, s33 offset:1468
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v477*/, off, s33 offset:1472
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v478*/, off, s33 offset:1476
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v479*/, off, s33 offset:1480
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v480*/, off, s33 offset:1484
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v481*/, off, s33 offset:1488
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v482*/, off, s33 offset:1492
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v483*/, off, s33 offset:1496
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v484*/, off, s33 offset:1500
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v485*/, off, s33 offset:1504
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v486*/, off, s33 offset:1508
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v487*/, off, s33 offset:1512
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v488*/, off, s33 offset:1516
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v489*/, off, s33 offset:1520
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v427*/, off, s33 offset:1272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v428*/, off, s33 offset:1276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v429*/, off, s33 offset:1280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v430*/, off, s33 offset:1284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v431*/, off, s33 offset:1288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v432*/, off, s33 offset:1292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v433*/, off, s33 offset:1296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v434*/, off, s33 offset:1300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v435*/, off, s33 offset:1304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v436*/, off, s33 offset:1308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v437*/, off, s33 offset:1312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v438*/, off, s33 offset:1316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v439*/, off, s33 offset:1320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v440*/, off, s33 offset:1324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v441*/, off, s33 offset:1328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v442*/, off, s33 offset:1332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v443*/, off, s33 offset:1336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v444*/, off, s33 offset:1340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v445*/, off, s33 offset:1344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v446*/, off, s33 offset:1348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v447*/, off, s33 offset:1352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v448*/, off, s33 offset:1356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v449*/, off, s33 offset:1360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v450*/, off, s33 offset:1364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v451*/, off, s33 offset:1368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v452*/, off, s33 offset:1372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v453*/, off, s33 offset:1376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v454*/, off, s33 offset:1380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v455*/, off, s33 offset:1384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v456*/, off, s33 offset:1388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v457*/, off, s33 offset:1392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v458*/, off, s33 offset:1396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v459*/, off, s33 offset:1400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v460*/, off, s33 offset:1404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v461*/, off, s33 offset:1408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v462*/, off, s33 offset:1412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v463*/, off, s33 offset:1416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v464*/, off, s33 offset:1420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v465*/, off, s33 offset:1424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v466*/, off, s33 offset:1428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v467*/, off, s33 offset:1432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v468*/, off, s33 offset:1436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v469*/, off, s33 offset:1440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v470*/, off, s33 offset:1444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v471*/, off, s33 offset:1448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v472*/, off, s33 offset:1452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v473*/, off, s33 offset:1456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v474*/, off, s33 offset:1460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v475*/, off, s33 offset:1464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v476*/, off, s33 offset:1468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v477*/, off, s33 offset:1472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v478*/, off, s33 offset:1476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v479*/, off, s33 offset:1480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v480*/, off, s33 offset:1484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v481*/, off, s33 offset:1488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v482*/, off, s33 offset:1492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v483*/, off, s33 offset:1496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v484*/, off, s33 offset:1500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v485*/, off, s33 offset:1504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v486*/, off, s33 offset:1508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v487*/, off, s33 offset:1512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v488*/, off, s33 offset:1516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v489*/, off, s33 offset:1520 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v490*/, off, s33 offset:1524
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v491*/, off, s33 offset:1528
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v492*/, off, s33 offset:1532
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v493*/, off, s33 offset:1536
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v494*/, off, s33 offset:1540
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v495*/, off, s33 offset:1544
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v496*/, off, s33 offset:1548
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v497*/, off, s33 offset:1552
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v498*/, off, s33 offset:1556
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v499*/, off, s33 offset:1560
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v500*/, off, s33 offset:1564
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v501*/, off, s33 offset:1568
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v502*/, off, s33 offset:1572
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v503*/, off, s33 offset:1576
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v504*/, off, s33 offset:1580
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v505*/, off, s33 offset:1584
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v506*/, off, s33 offset:1588
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v507*/, off, s33 offset:1592
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v508*/, off, s33 offset:1596
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v509*/, off, s33 offset:1600
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v510*/, off, s33 offset:1604
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v511*/, off, s33 offset:1608
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v490*/, off, s33 offset:1524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v491*/, off, s33 offset:1528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v492*/, off, s33 offset:1532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v493*/, off, s33 offset:1536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v494*/, off, s33 offset:1540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v495*/, off, s33 offset:1544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v496*/, off, s33 offset:1548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v497*/, off, s33 offset:1552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v498*/, off, s33 offset:1556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v499*/, off, s33 offset:1560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v500*/, off, s33 offset:1564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v501*/, off, s33 offset:1568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v502*/, off, s33 offset:1572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v503*/, off, s33 offset:1576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v504*/, off, s33 offset:1580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v505*/, off, s33 offset:1584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v506*/, off, s33 offset:1588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v507*/, off, s33 offset:1592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v508*/, off, s33 offset:1596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v509*/, off, s33 offset:1600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v510*/, off, s33 offset:1604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v511*/, off, s33 offset:1608 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x4080 ; msbs: dst=2 src0=0 src1=0 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v512*/, off, s33 offset:1612
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v513*/, off, s33 offset:1616
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v514*/, off, s33 offset:1620
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v515*/, off, s33 offset:1624
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v516*/, off, s33 offset:1628
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v517*/, off, s33 offset:1632
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v518*/, off, s33 offset:1636
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v519*/, off, s33 offset:1640
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v520*/, off, s33 offset:1644
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v521*/, off, s33 offset:1648
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v522*/, off, s33 offset:1652
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v523*/, off, s33 offset:1656
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v524*/, off, s33 offset:1660
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v525*/, off, s33 offset:1664
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v526*/, off, s33 offset:1668
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v527*/, off, s33 offset:1672
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v528*/, off, s33 offset:1676
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v529*/, off, s33 offset:1680
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v530*/, off, s33 offset:1684
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v531*/, off, s33 offset:1688
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v532*/, off, s33 offset:1692
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v533*/, off, s33 offset:1696
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v534*/, off, s33 offset:1700
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v535*/, off, s33 offset:1704
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v536*/, off, s33 offset:1708
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v537*/, off, s33 offset:1712
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v538*/, off, s33 offset:1716
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v539*/, off, s33 offset:1720
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v540*/, off, s33 offset:1724
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v541*/, off, s33 offset:1728
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v542*/, off, s33 offset:1732
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v543*/, off, s33 offset:1736
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v544*/, off, s33 offset:1740
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v545*/, off, s33 offset:1744
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v546*/, off, s33 offset:1748
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v547*/, off, s33 offset:1752
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v548*/, off, s33 offset:1756
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v549*/, off, s33 offset:1760
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v550*/, off, s33 offset:1764
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v551*/, off, s33 offset:1768
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v552*/, off, s33 offset:1772
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v512*/, off, s33 offset:1612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v513*/, off, s33 offset:1616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v514*/, off, s33 offset:1620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v515*/, off, s33 offset:1624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v516*/, off, s33 offset:1628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v517*/, off, s33 offset:1632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v518*/, off, s33 offset:1636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v519*/, off, s33 offset:1640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v520*/, off, s33 offset:1644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v521*/, off, s33 offset:1648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v522*/, off, s33 offset:1652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v523*/, off, s33 offset:1656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v524*/, off, s33 offset:1660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v525*/, off, s33 offset:1664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v526*/, off, s33 offset:1668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v527*/, off, s33 offset:1672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v528*/, off, s33 offset:1676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v529*/, off, s33 offset:1680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v530*/, off, s33 offset:1684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v531*/, off, s33 offset:1688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v532*/, off, s33 offset:1692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v533*/, off, s33 offset:1696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v534*/, off, s33 offset:1700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v535*/, off, s33 offset:1704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v536*/, off, s33 offset:1708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v537*/, off, s33 offset:1712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v538*/, off, s33 offset:1716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v539*/, off, s33 offset:1720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v540*/, off, s33 offset:1724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v541*/, off, s33 offset:1728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v542*/, off, s33 offset:1732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v543*/, off, s33 offset:1736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v544*/, off, s33 offset:1740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v545*/, off, s33 offset:1744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v546*/, off, s33 offset:1748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v547*/, off, s33 offset:1752 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v548*/, off, s33 offset:1756 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v549*/, off, s33 offset:1760 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v550*/, off, s33 offset:1764 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v551*/, off, s33 offset:1768 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v552*/, off, s33 offset:1772 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v553*/, off, s33 offset:1776
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v554*/, off, s33 offset:1780
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v555*/, off, s33 offset:1784
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v556*/, off, s33 offset:1788
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v557*/, off, s33 offset:1792
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v558*/, off, s33 offset:1796
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v559*/, off, s33 offset:1800
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v560*/, off, s33 offset:1804
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v561*/, off, s33 offset:1808
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v562*/, off, s33 offset:1812
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v563*/, off, s33 offset:1816
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v564*/, off, s33 offset:1820
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v565*/, off, s33 offset:1824
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v566*/, off, s33 offset:1828
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v567*/, off, s33 offset:1832
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v568*/, off, s33 offset:1836
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v569*/, off, s33 offset:1840
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v570*/, off, s33 offset:1844
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v571*/, off, s33 offset:1848
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v572*/, off, s33 offset:1852
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v573*/, off, s33 offset:1856
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v574*/, off, s33 offset:1860
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v575*/, off, s33 offset:1864
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v576*/, off, s33 offset:1868
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v577*/, off, s33 offset:1872
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v578*/, off, s33 offset:1876
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v579*/, off, s33 offset:1880
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v580*/, off, s33 offset:1884
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v581*/, off, s33 offset:1888
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v582*/, off, s33 offset:1892
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v583*/, off, s33 offset:1896
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v584*/, off, s33 offset:1900
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v585*/, off, s33 offset:1904
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v586*/, off, s33 offset:1908
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v587*/, off, s33 offset:1912
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v588*/, off, s33 offset:1916
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v589*/, off, s33 offset:1920
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v590*/, off, s33 offset:1924
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v591*/, off, s33 offset:1928
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v592*/, off, s33 offset:1932
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v593*/, off, s33 offset:1936
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v594*/, off, s33 offset:1940
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v595*/, off, s33 offset:1944
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v596*/, off, s33 offset:1948
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v597*/, off, s33 offset:1952
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v598*/, off, s33 offset:1956
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v599*/, off, s33 offset:1960
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v600*/, off, s33 offset:1964
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v601*/, off, s33 offset:1968
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v602*/, off, s33 offset:1972
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v603*/, off, s33 offset:1976
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v604*/, off, s33 offset:1980
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v605*/, off, s33 offset:1984
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v606*/, off, s33 offset:1988
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v607*/, off, s33 offset:1992
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v608*/, off, s33 offset:1996
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v609*/, off, s33 offset:2000
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v610*/, off, s33 offset:2004
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v611*/, off, s33 offset:2008
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v612*/, off, s33 offset:2012
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v613*/, off, s33 offset:2016
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v614*/, off, s33 offset:2020
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v615*/, off, s33 offset:2024
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v553*/, off, s33 offset:1776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v554*/, off, s33 offset:1780 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v555*/, off, s33 offset:1784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v556*/, off, s33 offset:1788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v557*/, off, s33 offset:1792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v558*/, off, s33 offset:1796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v559*/, off, s33 offset:1800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v560*/, off, s33 offset:1804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v561*/, off, s33 offset:1808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v562*/, off, s33 offset:1812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v563*/, off, s33 offset:1816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v564*/, off, s33 offset:1820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v565*/, off, s33 offset:1824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v566*/, off, s33 offset:1828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v567*/, off, s33 offset:1832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v568*/, off, s33 offset:1836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v569*/, off, s33 offset:1840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v570*/, off, s33 offset:1844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v571*/, off, s33 offset:1848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v572*/, off, s33 offset:1852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v573*/, off, s33 offset:1856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v574*/, off, s33 offset:1860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v575*/, off, s33 offset:1864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v576*/, off, s33 offset:1868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v577*/, off, s33 offset:1872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v578*/, off, s33 offset:1876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v579*/, off, s33 offset:1880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v580*/, off, s33 offset:1884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v581*/, off, s33 offset:1888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v582*/, off, s33 offset:1892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v583*/, off, s33 offset:1896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v584*/, off, s33 offset:1900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v585*/, off, s33 offset:1904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v586*/, off, s33 offset:1908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v587*/, off, s33 offset:1912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v588*/, off, s33 offset:1916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v589*/, off, s33 offset:1920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v590*/, off, s33 offset:1924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v591*/, off, s33 offset:1928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v592*/, off, s33 offset:1932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v593*/, off, s33 offset:1936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v594*/, off, s33 offset:1940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v595*/, off, s33 offset:1944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v596*/, off, s33 offset:1948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v597*/, off, s33 offset:1952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v598*/, off, s33 offset:1956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v599*/, off, s33 offset:1960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v600*/, off, s33 offset:1964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v601*/, off, s33 offset:1968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v602*/, off, s33 offset:1972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v603*/, off, s33 offset:1976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v604*/, off, s33 offset:1980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v605*/, off, s33 offset:1984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v606*/, off, s33 offset:1988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v607*/, off, s33 offset:1992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v608*/, off, s33 offset:1996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v609*/, off, s33 offset:2000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v610*/, off, s33 offset:2004 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v611*/, off, s33 offset:2008 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v612*/, off, s33 offset:2012 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v613*/, off, s33 offset:2016 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v614*/, off, s33 offset:2020 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v615*/, off, s33 offset:2024 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v616*/, off, s33 offset:2028
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v617*/, off, s33 offset:2032
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v618*/, off, s33 offset:2036
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v619*/, off, s33 offset:2040
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v620*/, off, s33 offset:2044
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v621*/, off, s33 offset:2048
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v622*/, off, s33 offset:2052
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v623*/, off, s33 offset:2056
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v624*/, off, s33 offset:2060
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v625*/, off, s33 offset:2064
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v626*/, off, s33 offset:2068
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v627*/, off, s33 offset:2072
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v628*/, off, s33 offset:2076
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v629*/, off, s33 offset:2080
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v630*/, off, s33 offset:2084
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v631*/, off, s33 offset:2088
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v632*/, off, s33 offset:2092
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v633*/, off, s33 offset:2096
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v634*/, off, s33 offset:2100
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v635*/, off, s33 offset:2104
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v636*/, off, s33 offset:2108
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v637*/, off, s33 offset:2112
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v638*/, off, s33 offset:2116
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v639*/, off, s33 offset:2120
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v640*/, off, s33 offset:2124
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v641*/, off, s33 offset:2128
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v642*/, off, s33 offset:2132
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v643*/, off, s33 offset:2136
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v644*/, off, s33 offset:2140
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v645*/, off, s33 offset:2144
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v646*/, off, s33 offset:2148
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v647*/, off, s33 offset:2152
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v648*/, off, s33 offset:2156
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v649*/, off, s33 offset:2160
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v650*/, off, s33 offset:2164
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v651*/, off, s33 offset:2168
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v652*/, off, s33 offset:2172
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v653*/, off, s33 offset:2176
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v654*/, off, s33 offset:2180
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v655*/, off, s33 offset:2184
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v656*/, off, s33 offset:2188
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v657*/, off, s33 offset:2192
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v658*/, off, s33 offset:2196
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v659*/, off, s33 offset:2200
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v660*/, off, s33 offset:2204
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v661*/, off, s33 offset:2208
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v662*/, off, s33 offset:2212
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v663*/, off, s33 offset:2216
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v664*/, off, s33 offset:2220
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v665*/, off, s33 offset:2224
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v666*/, off, s33 offset:2228
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v667*/, off, s33 offset:2232
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v668*/, off, s33 offset:2236
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v669*/, off, s33 offset:2240
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v670*/, off, s33 offset:2244
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v671*/, off, s33 offset:2248
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v672*/, off, s33 offset:2252
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v673*/, off, s33 offset:2256
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v674*/, off, s33 offset:2260
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v675*/, off, s33 offset:2264
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v676*/, off, s33 offset:2268
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v677*/, off, s33 offset:2272
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v678*/, off, s33 offset:2276
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v616*/, off, s33 offset:2028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v617*/, off, s33 offset:2032 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v618*/, off, s33 offset:2036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v619*/, off, s33 offset:2040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v620*/, off, s33 offset:2044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v621*/, off, s33 offset:2048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v622*/, off, s33 offset:2052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v623*/, off, s33 offset:2056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v624*/, off, s33 offset:2060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v625*/, off, s33 offset:2064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v626*/, off, s33 offset:2068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v627*/, off, s33 offset:2072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v628*/, off, s33 offset:2076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v629*/, off, s33 offset:2080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v630*/, off, s33 offset:2084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v631*/, off, s33 offset:2088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v632*/, off, s33 offset:2092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v633*/, off, s33 offset:2096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v634*/, off, s33 offset:2100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v635*/, off, s33 offset:2104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v636*/, off, s33 offset:2108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v637*/, off, s33 offset:2112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v638*/, off, s33 offset:2116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v639*/, off, s33 offset:2120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v640*/, off, s33 offset:2124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v641*/, off, s33 offset:2128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v642*/, off, s33 offset:2132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v643*/, off, s33 offset:2136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v644*/, off, s33 offset:2140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v645*/, off, s33 offset:2144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v646*/, off, s33 offset:2148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v647*/, off, s33 offset:2152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v648*/, off, s33 offset:2156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v649*/, off, s33 offset:2160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v650*/, off, s33 offset:2164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v651*/, off, s33 offset:2168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v652*/, off, s33 offset:2172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v653*/, off, s33 offset:2176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v654*/, off, s33 offset:2180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v655*/, off, s33 offset:2184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v656*/, off, s33 offset:2188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v657*/, off, s33 offset:2192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v658*/, off, s33 offset:2196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v659*/, off, s33 offset:2200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v660*/, off, s33 offset:2204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v661*/, off, s33 offset:2208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v662*/, off, s33 offset:2212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v663*/, off, s33 offset:2216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v664*/, off, s33 offset:2220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v665*/, off, s33 offset:2224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v666*/, off, s33 offset:2228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v667*/, off, s33 offset:2232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v668*/, off, s33 offset:2236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v669*/, off, s33 offset:2240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v670*/, off, s33 offset:2244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v671*/, off, s33 offset:2248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v672*/, off, s33 offset:2252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v673*/, off, s33 offset:2256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v674*/, off, s33 offset:2260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v675*/, off, s33 offset:2264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v676*/, off, s33 offset:2268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v677*/, off, s33 offset:2272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v678*/, off, s33 offset:2276 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v679*/, off, s33 offset:2280
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v680*/, off, s33 offset:2284
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v681*/, off, s33 offset:2288
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v682*/, off, s33 offset:2292
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v683*/, off, s33 offset:2296
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v684*/, off, s33 offset:2300
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v685*/, off, s33 offset:2304
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v686*/, off, s33 offset:2308
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v687*/, off, s33 offset:2312
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v688*/, off, s33 offset:2316
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v689*/, off, s33 offset:2320
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v690*/, off, s33 offset:2324
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v691*/, off, s33 offset:2328
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v692*/, off, s33 offset:2332
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v693*/, off, s33 offset:2336
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v694*/, off, s33 offset:2340
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v695*/, off, s33 offset:2344
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v696*/, off, s33 offset:2348
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v697*/, off, s33 offset:2352
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v698*/, off, s33 offset:2356
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v699*/, off, s33 offset:2360
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v700*/, off, s33 offset:2364
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v701*/, off, s33 offset:2368
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v702*/, off, s33 offset:2372
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v703*/, off, s33 offset:2376
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v704*/, off, s33 offset:2380
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v705*/, off, s33 offset:2384
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v706*/, off, s33 offset:2388
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v707*/, off, s33 offset:2392
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v708*/, off, s33 offset:2396
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v709*/, off, s33 offset:2400
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v710*/, off, s33 offset:2404
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v711*/, off, s33 offset:2408
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v712*/, off, s33 offset:2412
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v713*/, off, s33 offset:2416
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v714*/, off, s33 offset:2420
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v715*/, off, s33 offset:2424
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v716*/, off, s33 offset:2428
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v717*/, off, s33 offset:2432
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v718*/, off, s33 offset:2436
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v719*/, off, s33 offset:2440
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v720*/, off, s33 offset:2444
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v721*/, off, s33 offset:2448
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v722*/, off, s33 offset:2452
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v723*/, off, s33 offset:2456
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v724*/, off, s33 offset:2460
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v725*/, off, s33 offset:2464
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v726*/, off, s33 offset:2468
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v727*/, off, s33 offset:2472
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v728*/, off, s33 offset:2476
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v729*/, off, s33 offset:2480
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v730*/, off, s33 offset:2484
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v731*/, off, s33 offset:2488
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v732*/, off, s33 offset:2492
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v733*/, off, s33 offset:2496
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v734*/, off, s33 offset:2500
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v735*/, off, s33 offset:2504
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v736*/, off, s33 offset:2508
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v737*/, off, s33 offset:2512
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v738*/, off, s33 offset:2516
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v739*/, off, s33 offset:2520
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v740*/, off, s33 offset:2524
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v741*/, off, s33 offset:2528
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v679*/, off, s33 offset:2280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v680*/, off, s33 offset:2284 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v681*/, off, s33 offset:2288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v682*/, off, s33 offset:2292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v683*/, off, s33 offset:2296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v684*/, off, s33 offset:2300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v685*/, off, s33 offset:2304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v686*/, off, s33 offset:2308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v687*/, off, s33 offset:2312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v688*/, off, s33 offset:2316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v689*/, off, s33 offset:2320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v690*/, off, s33 offset:2324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v691*/, off, s33 offset:2328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v692*/, off, s33 offset:2332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v693*/, off, s33 offset:2336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v694*/, off, s33 offset:2340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v695*/, off, s33 offset:2344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v696*/, off, s33 offset:2348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v697*/, off, s33 offset:2352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v698*/, off, s33 offset:2356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v699*/, off, s33 offset:2360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v700*/, off, s33 offset:2364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v701*/, off, s33 offset:2368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v702*/, off, s33 offset:2372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v703*/, off, s33 offset:2376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v704*/, off, s33 offset:2380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v705*/, off, s33 offset:2384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v706*/, off, s33 offset:2388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v707*/, off, s33 offset:2392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v708*/, off, s33 offset:2396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v709*/, off, s33 offset:2400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v710*/, off, s33 offset:2404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v711*/, off, s33 offset:2408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v712*/, off, s33 offset:2412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v713*/, off, s33 offset:2416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v714*/, off, s33 offset:2420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v715*/, off, s33 offset:2424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v716*/, off, s33 offset:2428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v717*/, off, s33 offset:2432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v718*/, off, s33 offset:2436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v719*/, off, s33 offset:2440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v720*/, off, s33 offset:2444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v721*/, off, s33 offset:2448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v722*/, off, s33 offset:2452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v723*/, off, s33 offset:2456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v724*/, off, s33 offset:2460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v725*/, off, s33 offset:2464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v726*/, off, s33 offset:2468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v727*/, off, s33 offset:2472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v728*/, off, s33 offset:2476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v729*/, off, s33 offset:2480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v730*/, off, s33 offset:2484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v731*/, off, s33 offset:2488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v732*/, off, s33 offset:2492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v733*/, off, s33 offset:2496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v734*/, off, s33 offset:2500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v735*/, off, s33 offset:2504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v736*/, off, s33 offset:2508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v737*/, off, s33 offset:2512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v738*/, off, s33 offset:2516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v739*/, off, s33 offset:2520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v740*/, off, s33 offset:2524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v741*/, off, s33 offset:2528 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v742*/, off, s33 offset:2532
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v743*/, off, s33 offset:2536
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v744*/, off, s33 offset:2540
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v745*/, off, s33 offset:2544
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v746*/, off, s33 offset:2548
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v747*/, off, s33 offset:2552
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v748*/, off, s33 offset:2556
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v749*/, off, s33 offset:2560
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v750*/, off, s33 offset:2564
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v751*/, off, s33 offset:2568
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v752*/, off, s33 offset:2572
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v753*/, off, s33 offset:2576
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v754*/, off, s33 offset:2580
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v755*/, off, s33 offset:2584
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v756*/, off, s33 offset:2588
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v757*/, off, s33 offset:2592
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v758*/, off, s33 offset:2596
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v759*/, off, s33 offset:2600
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v760*/, off, s33 offset:2604
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v761*/, off, s33 offset:2608
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v762*/, off, s33 offset:2612
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v763*/, off, s33 offset:2616
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v764*/, off, s33 offset:2620
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v765*/, off, s33 offset:2624
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v766*/, off, s33 offset:2628
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v767*/, off, s33 offset:2632
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v742*/, off, s33 offset:2532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v743*/, off, s33 offset:2536 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v744*/, off, s33 offset:2540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v745*/, off, s33 offset:2544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v746*/, off, s33 offset:2548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v747*/, off, s33 offset:2552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v748*/, off, s33 offset:2556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v749*/, off, s33 offset:2560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v750*/, off, s33 offset:2564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v751*/, off, s33 offset:2568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v752*/, off, s33 offset:2572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v753*/, off, s33 offset:2576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v754*/, off, s33 offset:2580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v755*/, off, s33 offset:2584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v756*/, off, s33 offset:2588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v757*/, off, s33 offset:2592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v758*/, off, s33 offset:2596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v759*/, off, s33 offset:2600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v760*/, off, s33 offset:2604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v761*/, off, s33 offset:2608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v762*/, off, s33 offset:2612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v763*/, off, s33 offset:2616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v764*/, off, s33 offset:2620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v765*/, off, s33 offset:2624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v766*/, off, s33 offset:2628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v767*/, off, s33 offset:2632 nv
 ; GFX1250-DAGISEL-NEXT:    s_set_vgpr_msb 0x80c0 ; msbs: dst=3 src0=0 src1=0 src2=0
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v768*/, off, s33 offset:2636
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v769*/, off, s33 offset:2640
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v770*/, off, s33 offset:2644
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v771*/, off, s33 offset:2648
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v772*/, off, s33 offset:2652
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v773*/, off, s33 offset:2656
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v774*/, off, s33 offset:2660
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v775*/, off, s33 offset:2664
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v776*/, off, s33 offset:2668
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v777*/, off, s33 offset:2672
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v778*/, off, s33 offset:2676
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v779*/, off, s33 offset:2680
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v780*/, off, s33 offset:2684
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v781*/, off, s33 offset:2688
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v782*/, off, s33 offset:2692
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v783*/, off, s33 offset:2696
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v784*/, off, s33 offset:2700
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v785*/, off, s33 offset:2704
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v786*/, off, s33 offset:2708
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v787*/, off, s33 offset:2712
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v788*/, off, s33 offset:2716
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v789*/, off, s33 offset:2720
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v790*/, off, s33 offset:2724
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v791*/, off, s33 offset:2728
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v792*/, off, s33 offset:2732
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v793*/, off, s33 offset:2736
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v794*/, off, s33 offset:2740
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v795*/, off, s33 offset:2744
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v796*/, off, s33 offset:2748
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v797*/, off, s33 offset:2752
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v798*/, off, s33 offset:2756
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v799*/, off, s33 offset:2760
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v800*/, off, s33 offset:2764
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v801*/, off, s33 offset:2768
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v802*/, off, s33 offset:2772
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v803*/, off, s33 offset:2776
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v804*/, off, s33 offset:2780
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v0 /*v768*/, off, s33 offset:2636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v1 /*v769*/, off, s33 offset:2640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v2 /*v770*/, off, s33 offset:2644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v3 /*v771*/, off, s33 offset:2648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v4 /*v772*/, off, s33 offset:2652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v5 /*v773*/, off, s33 offset:2656 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v6 /*v774*/, off, s33 offset:2660 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v7 /*v775*/, off, s33 offset:2664 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v8 /*v776*/, off, s33 offset:2668 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v9 /*v777*/, off, s33 offset:2672 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v10 /*v778*/, off, s33 offset:2676 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v11 /*v779*/, off, s33 offset:2680 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v12 /*v780*/, off, s33 offset:2684 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v13 /*v781*/, off, s33 offset:2688 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v14 /*v782*/, off, s33 offset:2692 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v15 /*v783*/, off, s33 offset:2696 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v16 /*v784*/, off, s33 offset:2700 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v17 /*v785*/, off, s33 offset:2704 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v18 /*v786*/, off, s33 offset:2708 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v19 /*v787*/, off, s33 offset:2712 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v20 /*v788*/, off, s33 offset:2716 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v21 /*v789*/, off, s33 offset:2720 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v22 /*v790*/, off, s33 offset:2724 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v23 /*v791*/, off, s33 offset:2728 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v24 /*v792*/, off, s33 offset:2732 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v25 /*v793*/, off, s33 offset:2736 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v26 /*v794*/, off, s33 offset:2740 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v27 /*v795*/, off, s33 offset:2744 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v28 /*v796*/, off, s33 offset:2748 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v29 /*v797*/, off, s33 offset:2752 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v30 /*v798*/, off, s33 offset:2756 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v31 /*v799*/, off, s33 offset:2760 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v32 /*v800*/, off, s33 offset:2764 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v33 /*v801*/, off, s33 offset:2768 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v34 /*v802*/, off, s33 offset:2772 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v35 /*v803*/, off, s33 offset:2776 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v36 /*v804*/, off, s33 offset:2780 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v805*/, off, s33 offset:2784
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v806*/, off, s33 offset:2788
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v807*/, off, s33 offset:2792
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v808*/, off, s33 offset:2796
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v809*/, off, s33 offset:2800
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v810*/, off, s33 offset:2804
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v811*/, off, s33 offset:2808
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v812*/, off, s33 offset:2812
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v813*/, off, s33 offset:2816
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v814*/, off, s33 offset:2820
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v815*/, off, s33 offset:2824
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v816*/, off, s33 offset:2828
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v817*/, off, s33 offset:2832
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v818*/, off, s33 offset:2836
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v819*/, off, s33 offset:2840
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v820*/, off, s33 offset:2844
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v821*/, off, s33 offset:2848
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v822*/, off, s33 offset:2852
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v823*/, off, s33 offset:2856
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v824*/, off, s33 offset:2860
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v825*/, off, s33 offset:2864
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v826*/, off, s33 offset:2868
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v827*/, off, s33 offset:2872
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v828*/, off, s33 offset:2876
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v829*/, off, s33 offset:2880
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v830*/, off, s33 offset:2884
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v831*/, off, s33 offset:2888
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v832*/, off, s33 offset:2892
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v833*/, off, s33 offset:2896
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v834*/, off, s33 offset:2900
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v835*/, off, s33 offset:2904
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v836*/, off, s33 offset:2908
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v837*/, off, s33 offset:2912
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v838*/, off, s33 offset:2916
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v839*/, off, s33 offset:2920
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v840*/, off, s33 offset:2924
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v841*/, off, s33 offset:2928
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v842*/, off, s33 offset:2932
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v843*/, off, s33 offset:2936
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v844*/, off, s33 offset:2940
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v845*/, off, s33 offset:2944
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v846*/, off, s33 offset:2948
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v847*/, off, s33 offset:2952
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v848*/, off, s33 offset:2956
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v849*/, off, s33 offset:2960
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v850*/, off, s33 offset:2964
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v851*/, off, s33 offset:2968
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v852*/, off, s33 offset:2972
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v853*/, off, s33 offset:2976
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v854*/, off, s33 offset:2980
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v855*/, off, s33 offset:2984
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v856*/, off, s33 offset:2988
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v857*/, off, s33 offset:2992
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v858*/, off, s33 offset:2996
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v859*/, off, s33 offset:3000
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v860*/, off, s33 offset:3004
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v861*/, off, s33 offset:3008
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v862*/, off, s33 offset:3012
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v863*/, off, s33 offset:3016
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v864*/, off, s33 offset:3020
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v865*/, off, s33 offset:3024
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v866*/, off, s33 offset:3028
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v867*/, off, s33 offset:3032
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v37 /*v805*/, off, s33 offset:2784 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v38 /*v806*/, off, s33 offset:2788 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v39 /*v807*/, off, s33 offset:2792 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v40 /*v808*/, off, s33 offset:2796 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v41 /*v809*/, off, s33 offset:2800 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v42 /*v810*/, off, s33 offset:2804 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v43 /*v811*/, off, s33 offset:2808 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v44 /*v812*/, off, s33 offset:2812 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v45 /*v813*/, off, s33 offset:2816 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v46 /*v814*/, off, s33 offset:2820 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v47 /*v815*/, off, s33 offset:2824 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v48 /*v816*/, off, s33 offset:2828 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v49 /*v817*/, off, s33 offset:2832 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v50 /*v818*/, off, s33 offset:2836 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v51 /*v819*/, off, s33 offset:2840 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v52 /*v820*/, off, s33 offset:2844 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v53 /*v821*/, off, s33 offset:2848 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v54 /*v822*/, off, s33 offset:2852 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v55 /*v823*/, off, s33 offset:2856 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v56 /*v824*/, off, s33 offset:2860 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v57 /*v825*/, off, s33 offset:2864 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v58 /*v826*/, off, s33 offset:2868 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v59 /*v827*/, off, s33 offset:2872 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v60 /*v828*/, off, s33 offset:2876 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v61 /*v829*/, off, s33 offset:2880 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v62 /*v830*/, off, s33 offset:2884 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v63 /*v831*/, off, s33 offset:2888 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v64 /*v832*/, off, s33 offset:2892 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v65 /*v833*/, off, s33 offset:2896 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v66 /*v834*/, off, s33 offset:2900 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v67 /*v835*/, off, s33 offset:2904 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v68 /*v836*/, off, s33 offset:2908 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v69 /*v837*/, off, s33 offset:2912 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v70 /*v838*/, off, s33 offset:2916 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v71 /*v839*/, off, s33 offset:2920 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v72 /*v840*/, off, s33 offset:2924 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v73 /*v841*/, off, s33 offset:2928 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v74 /*v842*/, off, s33 offset:2932 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v75 /*v843*/, off, s33 offset:2936 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v76 /*v844*/, off, s33 offset:2940 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v77 /*v845*/, off, s33 offset:2944 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v78 /*v846*/, off, s33 offset:2948 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v79 /*v847*/, off, s33 offset:2952 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v80 /*v848*/, off, s33 offset:2956 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v81 /*v849*/, off, s33 offset:2960 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v82 /*v850*/, off, s33 offset:2964 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v83 /*v851*/, off, s33 offset:2968 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v84 /*v852*/, off, s33 offset:2972 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v85 /*v853*/, off, s33 offset:2976 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v86 /*v854*/, off, s33 offset:2980 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v87 /*v855*/, off, s33 offset:2984 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v88 /*v856*/, off, s33 offset:2988 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v89 /*v857*/, off, s33 offset:2992 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v90 /*v858*/, off, s33 offset:2996 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v91 /*v859*/, off, s33 offset:3000 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v92 /*v860*/, off, s33 offset:3004 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v93 /*v861*/, off, s33 offset:3008 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v94 /*v862*/, off, s33 offset:3012 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v95 /*v863*/, off, s33 offset:3016 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v96 /*v864*/, off, s33 offset:3020 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v97 /*v865*/, off, s33 offset:3024 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v98 /*v866*/, off, s33 offset:3028 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v99 /*v867*/, off, s33 offset:3032 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v868*/, off, s33 offset:3036
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v869*/, off, s33 offset:3040
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v870*/, off, s33 offset:3044
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v871*/, off, s33 offset:3048
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v872*/, off, s33 offset:3052
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v873*/, off, s33 offset:3056
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v874*/, off, s33 offset:3060
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v875*/, off, s33 offset:3064
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v876*/, off, s33 offset:3068
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v877*/, off, s33 offset:3072
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v878*/, off, s33 offset:3076
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v879*/, off, s33 offset:3080
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v880*/, off, s33 offset:3084
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v881*/, off, s33 offset:3088
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v882*/, off, s33 offset:3092
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v883*/, off, s33 offset:3096
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v884*/, off, s33 offset:3100
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v885*/, off, s33 offset:3104
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v886*/, off, s33 offset:3108
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v887*/, off, s33 offset:3112
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v888*/, off, s33 offset:3116
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v889*/, off, s33 offset:3120
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v890*/, off, s33 offset:3124
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v891*/, off, s33 offset:3128
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v892*/, off, s33 offset:3132
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v893*/, off, s33 offset:3136
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v894*/, off, s33 offset:3140
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v895*/, off, s33 offset:3144
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v896*/, off, s33 offset:3148
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v897*/, off, s33 offset:3152
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v898*/, off, s33 offset:3156
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v899*/, off, s33 offset:3160
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v900*/, off, s33 offset:3164
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v901*/, off, s33 offset:3168
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v902*/, off, s33 offset:3172
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v903*/, off, s33 offset:3176
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v904*/, off, s33 offset:3180
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v905*/, off, s33 offset:3184
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v906*/, off, s33 offset:3188
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v907*/, off, s33 offset:3192
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v908*/, off, s33 offset:3196
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v909*/, off, s33 offset:3200
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v910*/, off, s33 offset:3204
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v911*/, off, s33 offset:3208
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v912*/, off, s33 offset:3212
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v913*/, off, s33 offset:3216
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v914*/, off, s33 offset:3220
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v915*/, off, s33 offset:3224
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v916*/, off, s33 offset:3228
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v917*/, off, s33 offset:3232
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v918*/, off, s33 offset:3236
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v919*/, off, s33 offset:3240
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v920*/, off, s33 offset:3244
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v921*/, off, s33 offset:3248
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v922*/, off, s33 offset:3252
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v923*/, off, s33 offset:3256
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v924*/, off, s33 offset:3260
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v925*/, off, s33 offset:3264
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v926*/, off, s33 offset:3268
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v927*/, off, s33 offset:3272
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v928*/, off, s33 offset:3276
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v929*/, off, s33 offset:3280
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v930*/, off, s33 offset:3284
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v100 /*v868*/, off, s33 offset:3036 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v101 /*v869*/, off, s33 offset:3040 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v102 /*v870*/, off, s33 offset:3044 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v103 /*v871*/, off, s33 offset:3048 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v104 /*v872*/, off, s33 offset:3052 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v105 /*v873*/, off, s33 offset:3056 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v106 /*v874*/, off, s33 offset:3060 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v107 /*v875*/, off, s33 offset:3064 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v108 /*v876*/, off, s33 offset:3068 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v109 /*v877*/, off, s33 offset:3072 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v110 /*v878*/, off, s33 offset:3076 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v111 /*v879*/, off, s33 offset:3080 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v112 /*v880*/, off, s33 offset:3084 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v113 /*v881*/, off, s33 offset:3088 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v114 /*v882*/, off, s33 offset:3092 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v115 /*v883*/, off, s33 offset:3096 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v116 /*v884*/, off, s33 offset:3100 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v117 /*v885*/, off, s33 offset:3104 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v118 /*v886*/, off, s33 offset:3108 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v119 /*v887*/, off, s33 offset:3112 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v120 /*v888*/, off, s33 offset:3116 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v121 /*v889*/, off, s33 offset:3120 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v122 /*v890*/, off, s33 offset:3124 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v123 /*v891*/, off, s33 offset:3128 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v124 /*v892*/, off, s33 offset:3132 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v125 /*v893*/, off, s33 offset:3136 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v126 /*v894*/, off, s33 offset:3140 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v127 /*v895*/, off, s33 offset:3144 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v128 /*v896*/, off, s33 offset:3148 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v129 /*v897*/, off, s33 offset:3152 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v130 /*v898*/, off, s33 offset:3156 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v131 /*v899*/, off, s33 offset:3160 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v132 /*v900*/, off, s33 offset:3164 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v133 /*v901*/, off, s33 offset:3168 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v134 /*v902*/, off, s33 offset:3172 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v135 /*v903*/, off, s33 offset:3176 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v136 /*v904*/, off, s33 offset:3180 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v137 /*v905*/, off, s33 offset:3184 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v138 /*v906*/, off, s33 offset:3188 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v139 /*v907*/, off, s33 offset:3192 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v140 /*v908*/, off, s33 offset:3196 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v141 /*v909*/, off, s33 offset:3200 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v142 /*v910*/, off, s33 offset:3204 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v143 /*v911*/, off, s33 offset:3208 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v144 /*v912*/, off, s33 offset:3212 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v145 /*v913*/, off, s33 offset:3216 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v146 /*v914*/, off, s33 offset:3220 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v147 /*v915*/, off, s33 offset:3224 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v148 /*v916*/, off, s33 offset:3228 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v149 /*v917*/, off, s33 offset:3232 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v150 /*v918*/, off, s33 offset:3236 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v151 /*v919*/, off, s33 offset:3240 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v152 /*v920*/, off, s33 offset:3244 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v153 /*v921*/, off, s33 offset:3248 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v154 /*v922*/, off, s33 offset:3252 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v155 /*v923*/, off, s33 offset:3256 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v156 /*v924*/, off, s33 offset:3260 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v157 /*v925*/, off, s33 offset:3264 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v158 /*v926*/, off, s33 offset:3268 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v159 /*v927*/, off, s33 offset:3272 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v160 /*v928*/, off, s33 offset:3276 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v161 /*v929*/, off, s33 offset:3280 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v162 /*v930*/, off, s33 offset:3284 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x3e ; 252-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v931*/, off, s33 offset:3288
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v932*/, off, s33 offset:3292
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v933*/, off, s33 offset:3296
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v934*/, off, s33 offset:3300
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v935*/, off, s33 offset:3304
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v936*/, off, s33 offset:3308
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v937*/, off, s33 offset:3312
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v938*/, off, s33 offset:3316
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v939*/, off, s33 offset:3320
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v940*/, off, s33 offset:3324
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v941*/, off, s33 offset:3328
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v942*/, off, s33 offset:3332
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v943*/, off, s33 offset:3336
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v944*/, off, s33 offset:3340
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v945*/, off, s33 offset:3344
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v946*/, off, s33 offset:3348
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v947*/, off, s33 offset:3352
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v948*/, off, s33 offset:3356
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v949*/, off, s33 offset:3360
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v950*/, off, s33 offset:3364
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v951*/, off, s33 offset:3368
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v952*/, off, s33 offset:3372
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v953*/, off, s33 offset:3376
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v954*/, off, s33 offset:3380
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v955*/, off, s33 offset:3384
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v956*/, off, s33 offset:3388
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v957*/, off, s33 offset:3392
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v958*/, off, s33 offset:3396
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v959*/, off, s33 offset:3400
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v960*/, off, s33 offset:3404
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v961*/, off, s33 offset:3408
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v962*/, off, s33 offset:3412
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v963*/, off, s33 offset:3416
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v964*/, off, s33 offset:3420
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v965*/, off, s33 offset:3424
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v966*/, off, s33 offset:3428
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v967*/, off, s33 offset:3432
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v968*/, off, s33 offset:3436
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v969*/, off, s33 offset:3440
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v970*/, off, s33 offset:3444
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v971*/, off, s33 offset:3448
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v972*/, off, s33 offset:3452
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v973*/, off, s33 offset:3456
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v974*/, off, s33 offset:3460
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v975*/, off, s33 offset:3464
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v976*/, off, s33 offset:3468
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v977*/, off, s33 offset:3472
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v978*/, off, s33 offset:3476
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v979*/, off, s33 offset:3480
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v980*/, off, s33 offset:3484
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v981*/, off, s33 offset:3488
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v982*/, off, s33 offset:3492
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v983*/, off, s33 offset:3496
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v984*/, off, s33 offset:3500
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v985*/, off, s33 offset:3504
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v986*/, off, s33 offset:3508
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v987*/, off, s33 offset:3512
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v988*/, off, s33 offset:3516
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v989*/, off, s33 offset:3520
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v990*/, off, s33 offset:3524
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v991*/, off, s33 offset:3528
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v992*/, off, s33 offset:3532
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v993*/, off, s33 offset:3536
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v163 /*v931*/, off, s33 offset:3288 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v164 /*v932*/, off, s33 offset:3292 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v165 /*v933*/, off, s33 offset:3296 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v166 /*v934*/, off, s33 offset:3300 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v167 /*v935*/, off, s33 offset:3304 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v168 /*v936*/, off, s33 offset:3308 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v169 /*v937*/, off, s33 offset:3312 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v170 /*v938*/, off, s33 offset:3316 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v171 /*v939*/, off, s33 offset:3320 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v172 /*v940*/, off, s33 offset:3324 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v173 /*v941*/, off, s33 offset:3328 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v174 /*v942*/, off, s33 offset:3332 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v175 /*v943*/, off, s33 offset:3336 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v176 /*v944*/, off, s33 offset:3340 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v177 /*v945*/, off, s33 offset:3344 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v178 /*v946*/, off, s33 offset:3348 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v179 /*v947*/, off, s33 offset:3352 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v180 /*v948*/, off, s33 offset:3356 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v181 /*v949*/, off, s33 offset:3360 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v182 /*v950*/, off, s33 offset:3364 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v183 /*v951*/, off, s33 offset:3368 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v184 /*v952*/, off, s33 offset:3372 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v185 /*v953*/, off, s33 offset:3376 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v186 /*v954*/, off, s33 offset:3380 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v187 /*v955*/, off, s33 offset:3384 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v188 /*v956*/, off, s33 offset:3388 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v189 /*v957*/, off, s33 offset:3392 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v190 /*v958*/, off, s33 offset:3396 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v191 /*v959*/, off, s33 offset:3400 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v192 /*v960*/, off, s33 offset:3404 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v193 /*v961*/, off, s33 offset:3408 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v194 /*v962*/, off, s33 offset:3412 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v195 /*v963*/, off, s33 offset:3416 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v196 /*v964*/, off, s33 offset:3420 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v197 /*v965*/, off, s33 offset:3424 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v198 /*v966*/, off, s33 offset:3428 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v199 /*v967*/, off, s33 offset:3432 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v200 /*v968*/, off, s33 offset:3436 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v201 /*v969*/, off, s33 offset:3440 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v202 /*v970*/, off, s33 offset:3444 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v203 /*v971*/, off, s33 offset:3448 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v204 /*v972*/, off, s33 offset:3452 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v205 /*v973*/, off, s33 offset:3456 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v206 /*v974*/, off, s33 offset:3460 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v207 /*v975*/, off, s33 offset:3464 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v208 /*v976*/, off, s33 offset:3468 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v209 /*v977*/, off, s33 offset:3472 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v210 /*v978*/, off, s33 offset:3476 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v211 /*v979*/, off, s33 offset:3480 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v212 /*v980*/, off, s33 offset:3484 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v213 /*v981*/, off, s33 offset:3488 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v214 /*v982*/, off, s33 offset:3492 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v215 /*v983*/, off, s33 offset:3496 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v216 /*v984*/, off, s33 offset:3500 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v217 /*v985*/, off, s33 offset:3504 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v218 /*v986*/, off, s33 offset:3508 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v219 /*v987*/, off, s33 offset:3512 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v220 /*v988*/, off, s33 offset:3516 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v221 /*v989*/, off, s33 offset:3520 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v222 /*v990*/, off, s33 offset:3524 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v223 /*v991*/, off, s33 offset:3528 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v224 /*v992*/, off, s33 offset:3532 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v225 /*v993*/, off, s33 offset:3536 nv
 ; GFX1250-DAGISEL-NEXT:    s_clause 0x1d ; 120-byte Folded Reload
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v994*/, off, s33 offset:3540
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v995*/, off, s33 offset:3544
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v996*/, off, s33 offset:3548
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v997*/, off, s33 offset:3552
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v998*/, off, s33 offset:3556
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v999*/, off, s33 offset:3560
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v1000*/, off, s33 offset:3564
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v1001*/, off, s33 offset:3568
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v1002*/, off, s33 offset:3572
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v1003*/, off, s33 offset:3576
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v1004*/, off, s33 offset:3580
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v1005*/, off, s33 offset:3584
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v1006*/, off, s33 offset:3588
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v1007*/, off, s33 offset:3592
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v1008*/, off, s33 offset:3596
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v1009*/, off, s33 offset:3600
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v1010*/, off, s33 offset:3604
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v1011*/, off, s33 offset:3608
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v1012*/, off, s33 offset:3612
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v1013*/, off, s33 offset:3616
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v1014*/, off, s33 offset:3620
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v1015*/, off, s33 offset:3624
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v1016*/, off, s33 offset:3628
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v1017*/, off, s33 offset:3632
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v1018*/, off, s33 offset:3636
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v1019*/, off, s33 offset:3640
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v1020*/, off, s33 offset:3644
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v1021*/, off, s33 offset:3648
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v1022*/, off, s33 offset:3652
-; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v1023*/, off, s33 offset:3656
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v226 /*v994*/, off, s33 offset:3540 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v227 /*v995*/, off, s33 offset:3544 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v228 /*v996*/, off, s33 offset:3548 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v229 /*v997*/, off, s33 offset:3552 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v230 /*v998*/, off, s33 offset:3556 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v231 /*v999*/, off, s33 offset:3560 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v232 /*v1000*/, off, s33 offset:3564 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v233 /*v1001*/, off, s33 offset:3568 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v234 /*v1002*/, off, s33 offset:3572 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v235 /*v1003*/, off, s33 offset:3576 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v236 /*v1004*/, off, s33 offset:3580 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v237 /*v1005*/, off, s33 offset:3584 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v238 /*v1006*/, off, s33 offset:3588 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v239 /*v1007*/, off, s33 offset:3592 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v240 /*v1008*/, off, s33 offset:3596 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v241 /*v1009*/, off, s33 offset:3600 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v242 /*v1010*/, off, s33 offset:3604 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v243 /*v1011*/, off, s33 offset:3608 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v244 /*v1012*/, off, s33 offset:3612 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v245 /*v1013*/, off, s33 offset:3616 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v246 /*v1014*/, off, s33 offset:3620 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v247 /*v1015*/, off, s33 offset:3624 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v248 /*v1016*/, off, s33 offset:3628 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v249 /*v1017*/, off, s33 offset:3632 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v250 /*v1018*/, off, s33 offset:3636 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v251 /*v1019*/, off, s33 offset:3640 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v252 /*v1020*/, off, s33 offset:3644 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v253 /*v1021*/, off, s33 offset:3648 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v254 /*v1022*/, off, s33 offset:3652 nv
+; GFX1250-DAGISEL-NEXT:    scratch_load_b32 v255 /*v1023*/, off, s33 offset:3656 nv
 ; GFX1250-DAGISEL-NEXT:    s_wait_xcnt 0x0
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 exec_lo, s4
 ; GFX1250-DAGISEL-NEXT:    s_mov_b32 s33, s0

>From 14673b551e17ef9f4a0cb5add23be91becb97355 Mon Sep 17 00:00:00 2001
From: pvanhout <pierre.vanhoutryve at amd.com>
Date: Tue, 3 Feb 2026 14:34:56 +0100
Subject: [PATCH 2/2] Rename to MOThreadPrivate

---
 llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp     |    2 +-
 .../AMDGPU/accvgpr-spill-scc-clobber.mir      | 8628 ++++++++---------
 llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir  |   16 +-
 .../av-spill-expansion-with-machine-cp.mir    |    8 +-
 .../AMDGPU/av_spill_cross_bb_usage.mir        |   68 +-
 .../AMDGPU/branch-relax-indirect-branch.mir   |    8 +-
 .../AMDGPU/branch-relax-no-terminators.mir    |    8 +-
 .../CodeGen/AMDGPU/bug-undef-spilled-agpr.mir |    8 +-
 .../eliminate-frame-index-s-mov-b32.mir       |  333 +-
 .../eliminate-frame-index-v-add-co-u32.mir    |   36 +-
 llvm/test/CodeGen/AMDGPU/frame-index.mir      |  280 +-
 .../CodeGen/AMDGPU/insert-waitcnts-crash.ll   |    4 +-
 ...egrewriter-live-out-undef-subregisters.mir |    4 +-
 .../AMDGPU/kernel-mubuf-with-voffset.mir      |    2 +-
 ...al-regcopy-and-spill-missed-at-regalloc.ll |    4 +-
 .../AMDGPU/pei-amdgpu-cs-chain-preserve.mir   |   12 +-
 .../CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir    |    8 +-
 .../CodeGen/AMDGPU/pei-build-av-spill.mir     | 2520 ++---
 .../AMDGPU/pei-build-spill-partial-agpr.mir   |  124 +-
 llvm/test/CodeGen/AMDGPU/pei-build-spill.mir  | 1804 ++--
 .../AMDGPU/pei-reg-scavenger-position.mir     |    4 +-
 .../AMDGPU/pei-scavenge-vgpr-spill.mir        |   12 +-
 .../AMDGPU/pei-vgpr-block-spill-csr.mir       |   68 +-
 .../regalloc-introduces-copy-sgpr-to-agpr.mir |  136 +-
 .../CodeGen/AMDGPU/same-slot-agpr-sgpr.mir    |   10 +-
 .../AMDGPU/sgpr-spill-overlap-wwm-reserve.mir |   20 +-
 .../AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir |  104 +-
 .../AMDGPU/sgpr-spill-vmem-large-frame.mir    |   12 +-
 llvm/test/CodeGen/AMDGPU/sgpr-spill.mir       |  360 +-
 .../AMDGPU/spill-agpr-partially-undef.mir     |   12 +-
 llvm/test/CodeGen/AMDGPU/spill-agpr.mir       |    8 +-
 .../AMDGPU/spill-reg-tuple-super-reg-use.mir  |   24 +-
 .../AMDGPU/spill-restore-partial-copy.mir     |    8 +-
 .../CodeGen/AMDGPU/spill-special-sgpr.mir     |   54 +-
 .../CodeGen/AMDGPU/spill-to-agpr-partial.mir  |   24 +-
 llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir   |   48 +-
 llvm/test/CodeGen/AMDGPU/spillv16.mir         |   16 +-
 ...d-op-for-wwm-scratch-reg-spill-restore.mir |   20 +-
 ...tor-spill-restore-to-other-vector-type.mir |   24 +-
 .../CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir |  224 +-
 llvm/test/CodeGen/AMDGPU/vgpr-spill.mir       |   28 +-
 .../AMDGPU/whole-wave-functions-pei.mir       |   80 +-
 42 files changed, 7586 insertions(+), 7587 deletions(-)

diff --git a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
index 1e7ec1832db25..b42111c074887 100644
--- a/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
+++ b/llvm/lib/Target/AMDGPU/SIRegisterInfo.cpp
@@ -1840,7 +1840,7 @@ void SIRegisterInfo::buildSpillLoadStore(
     // used for spills will not be used outside the thread.
     MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RegOffset);
     MachineMemOperand *NewMMO = MF->getMachineMemOperand(
-        PInfo, MMO->getFlags() | MONonVolatile, RemEltSize,
+        PInfo, MMO->getFlags() | MOThreadPrivate, RemEltSize,
         commonAlignment(Alignment, RegOffset));
 
     auto MIB =
diff --git a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
index bc2446bc40cf4..f4b0568c8121d 100644
--- a/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/accvgpr-spill-scc-clobber.mir
@@ -27,11 +27,11 @@ body:             |
   ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX908-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
   ; GFX908-NEXT:   $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX908-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.1:
@@ -97,187 +97,187 @@ body:             |
   ; GFX90A-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -289,182 +289,182 @@ body:             |
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
-  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -521,12 +521,12 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.1:
@@ -592,188 +592,188 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   $agpr0 = SCRATCH_LOAD_DWORD killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr0 = SCRATCH_LOAD_DWORD killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:
@@ -785,182 +785,182 @@ body:             |
   ; GFX90A-FLATSCR-NEXT: bb.2:
   ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
-  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -1045,13 +1045,13 @@ body:             |
   ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX908-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
   ; GFX908-NEXT:   $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
-  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
+  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1 + 4, addrspace 5)
   ; GFX908-NEXT:   $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX908-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.1:
@@ -1117,188 +1117,188 @@ body:             |
   ; GFX90A-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   $agpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   $agpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1 + 4, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -1310,182 +1310,182 @@ body:             |
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
-  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -1542,14 +1542,14 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
-  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1 + 4, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.1:
@@ -1615,188 +1615,188 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.1, align 4, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.1, align 4, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:
@@ -1808,182 +1808,182 @@ body:             |
   ; GFX90A-FLATSCR-NEXT: bb.2:
   ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
-  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -2068,15 +2068,15 @@ body:             |
   ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX908-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
   ; GFX908-NEXT:   $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
-  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
+  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1 + 4, addrspace 5)
   ; GFX908-NEXT:   $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 8, addrspace 5)
+  ; GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1 + 8, addrspace 5)
   ; GFX908-NEXT:   $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX908-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.1:
@@ -2142,189 +2142,189 @@ body:             |
   ; GFX90A-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   $agpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
-  ; GFX90A-NEXT:   $agpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 8, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   $agpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1 + 4, addrspace 5)
+  ; GFX90A-NEXT:   $agpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1 + 8, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -2336,182 +2336,182 @@ body:             |
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
-  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -2568,16 +2568,16 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
-  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1 + 4, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 8, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD $vgpr1, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1 + 8, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.1:
@@ -2643,188 +2643,188 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.1, align 4, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s96) from %stack.1, align 4, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:
@@ -2836,182 +2836,182 @@ body:             |
   ; GFX90A-FLATSCR-NEXT: bb.2:
   ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
-  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -3096,11 +3096,11 @@ body:             |
   ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX908-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; GFX908-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX908-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.1:
@@ -3166,187 +3166,187 @@ body:             |
   ; GFX90A-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -3358,182 +3358,182 @@ body:             |
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   liveins: $agpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
-  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -3590,12 +3590,12 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.1:
@@ -3661,188 +3661,188 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $agpr0, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $agpr0, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:
@@ -3854,182 +3854,182 @@ body:             |
   ; GFX90A-FLATSCR-NEXT: bb.2:
   ; GFX90A-FLATSCR-NEXT:   liveins: $agpr0, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
-  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -4113,13 +4113,13 @@ body:             |
   ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX908-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; GFX908-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
   ; GFX908-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
-  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.1 + 4, addrspace 5)
+  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX908-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.1:
@@ -4185,188 +4185,188 @@ body:             |
   ; GFX90A-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr1, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr1, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.1 + 4, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -4378,182 +4378,182 @@ body:             |
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
-  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -4610,14 +4610,14 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $agpr0_agpr1
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
-  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 4, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.1 + 4, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.1:
@@ -4683,188 +4683,188 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $agpr0_agpr1, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.1, align 4, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $agpr0_agpr1, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.1, align 4, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:
@@ -4876,182 +4876,182 @@ body:             |
   ; GFX90A-FLATSCR-NEXT: bb.2:
   ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
-  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -5134,15 +5134,15 @@ body:             |
   ; GFX908-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX908-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; GFX908-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
   ; GFX908-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1 + 4, addrspace 5)
   ; GFX908-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
-  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 8, addrspace 5)
-  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX908-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.1 + 8, addrspace 5)
+  ; GFX908-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX908-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-NEXT: {{  $}}
   ; GFX908-NEXT: bb.1:
@@ -5208,189 +5208,189 @@ body:             |
   ; GFX90A-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr80, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr81, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr82, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr83, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr84, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr85, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr86, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr87, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr88, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr89, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr90, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr91, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr92, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr93, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr94, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr95, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr96, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr97, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr98, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr99, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr100, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr101, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr102, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr103, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr104, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr105, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr106, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr107, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr108, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr109, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr110, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr111, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr112, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr113, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr114, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr115, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr116, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr117, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr118, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr119, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr120, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr121, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr122, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr123, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr124, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr125, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr126, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr127, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr128, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr129, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr130, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr131, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr132, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr133, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr134, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr135, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr136, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr137, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr138, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr139, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr140, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr141, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr142, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr143, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr144, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr145, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr146, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr147, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr148, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr149, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr150, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr151, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr152, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr153, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr154, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr155, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr156, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr157, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr158, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr159, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr160, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr161, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr162, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr163, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr164, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr165, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr166, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr167, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr168, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr169, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr170, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr171, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr172, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr173, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr174, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr175, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr176, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr177, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr178, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr179, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr180, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr181, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr182, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr183, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr184, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr185, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr186, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr187, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr188, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr189, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr190, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr191, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr192, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr193, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr194, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr195, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr196, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr197, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr198, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr199, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr200, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr201, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr202, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr203, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr204, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr205, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr206, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr207, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr208, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr209, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr210, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr211, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr212, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr213, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr214, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr215, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr216, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr217, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr218, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr219, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr220, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr221, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr222, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr223, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr224, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr225, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr226, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr227, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr228, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr229, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr230, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr231, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr232, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr233, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr234, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr235, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr236, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr237, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr238, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr239, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr240, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr241, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr242, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr243, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr244, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr245, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr246, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr247, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr248, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr249, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr250, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr251, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr252, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr253, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr254, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr255, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-NEXT:   $vgpr40 = V_MOV_B32_e32 8904, implicit $exec
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr1, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
-  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr2, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 8, addrspace 5)
-  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr0, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr1, $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1 + 4, addrspace 5)
+  ; GFX90A-NEXT:   BUFFER_STORE_DWORD_OFFEN $agpr2, killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.1 + 8, addrspace 5)
+  ; GFX90A-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 704, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-NEXT: {{  $}}
   ; GFX90A-NEXT: bb.1:
@@ -5402,182 +5402,182 @@ body:             |
   ; GFX90A-NEXT: bb.2:
   ; GFX90A-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-NEXT: {{  $}}
-  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-NEXT:   $agpr255 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-NEXT:   $agpr254 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-NEXT:   $agpr253 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-NEXT:   $agpr252 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-NEXT:   $agpr251 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-NEXT:   $agpr250 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-NEXT:   $agpr249 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-NEXT:   $agpr248 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-NEXT:   $agpr247 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-NEXT:   $agpr246 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-NEXT:   $agpr245 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-NEXT:   $agpr244 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-NEXT:   $agpr243 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-NEXT:   $agpr242 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-NEXT:   $agpr241 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-NEXT:   $agpr240 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-NEXT:   $agpr239 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-NEXT:   $agpr238 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-NEXT:   $agpr237 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-NEXT:   $agpr236 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-NEXT:   $agpr235 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-NEXT:   $agpr234 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-NEXT:   $agpr233 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-NEXT:   $agpr232 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-NEXT:   $agpr231 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-NEXT:   $agpr230 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-NEXT:   $agpr229 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-NEXT:   $agpr228 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-NEXT:   $agpr227 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-NEXT:   $agpr226 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-NEXT:   $agpr225 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-NEXT:   $agpr224 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-NEXT:   $agpr223 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 128, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-NEXT:   $agpr222 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-NEXT:   $agpr221 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 136, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-NEXT:   $agpr220 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 140, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-NEXT:   $agpr219 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 144, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-NEXT:   $agpr218 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 148, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-NEXT:   $agpr217 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 152, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-NEXT:   $agpr216 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 156, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-NEXT:   $agpr215 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 160, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-NEXT:   $agpr214 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 164, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-NEXT:   $agpr213 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 168, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-NEXT:   $agpr212 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 172, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-NEXT:   $agpr211 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 176, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-NEXT:   $agpr210 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 180, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-NEXT:   $agpr209 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 184, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-NEXT:   $agpr208 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 188, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-NEXT:   $agpr207 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 192, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-NEXT:   $agpr206 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 196, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-NEXT:   $agpr205 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 200, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-NEXT:   $agpr204 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 204, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-NEXT:   $agpr203 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 208, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-NEXT:   $agpr202 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 212, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-NEXT:   $agpr201 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 216, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-NEXT:   $agpr200 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 220, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-NEXT:   $agpr199 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 224, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-NEXT:   $agpr198 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 228, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-NEXT:   $agpr197 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 232, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-NEXT:   $agpr196 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 236, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-NEXT:   $agpr195 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 240, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-NEXT:   $agpr194 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 244, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-NEXT:   $agpr193 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 248, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-NEXT:   $agpr192 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 252, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-NEXT:   $agpr191 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 256, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-NEXT:   $agpr190 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 260, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-NEXT:   $agpr189 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 264, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-NEXT:   $agpr188 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 268, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-NEXT:   $agpr187 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 272, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-NEXT:   $agpr186 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 276, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-NEXT:   $agpr185 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 280, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-NEXT:   $agpr184 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 284, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-NEXT:   $agpr183 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 288, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-NEXT:   $agpr182 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 292, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-NEXT:   $agpr181 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 296, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-NEXT:   $agpr180 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 300, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-NEXT:   $agpr179 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 304, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-NEXT:   $agpr178 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 308, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-NEXT:   $agpr177 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 312, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-NEXT:   $agpr176 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 316, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-NEXT:   $agpr175 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 320, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-NEXT:   $agpr174 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 324, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-NEXT:   $agpr173 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 328, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-NEXT:   $agpr172 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 332, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-NEXT:   $agpr171 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 336, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-NEXT:   $agpr170 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 340, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-NEXT:   $agpr169 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 344, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-NEXT:   $agpr168 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 348, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-NEXT:   $agpr167 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 352, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-NEXT:   $agpr166 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 356, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-NEXT:   $agpr165 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 360, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-NEXT:   $agpr164 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 364, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-NEXT:   $agpr163 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 368, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-NEXT:   $agpr162 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 372, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-NEXT:   $agpr161 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 376, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-NEXT:   $agpr160 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 380, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-NEXT:   $agpr159 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 384, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-NEXT:   $agpr158 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 388, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-NEXT:   $agpr157 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 392, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-NEXT:   $agpr156 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 396, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-NEXT:   $agpr155 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 400, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-NEXT:   $agpr154 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 404, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-NEXT:   $agpr153 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 408, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-NEXT:   $agpr152 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 412, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-NEXT:   $agpr151 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 416, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-NEXT:   $agpr150 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 420, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-NEXT:   $agpr149 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 424, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-NEXT:   $agpr148 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 428, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-NEXT:   $agpr147 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 432, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-NEXT:   $agpr146 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 436, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-NEXT:   $agpr145 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 440, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-NEXT:   $agpr144 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 444, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-NEXT:   $agpr143 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 448, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-NEXT:   $agpr142 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 452, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-NEXT:   $agpr141 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 456, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-NEXT:   $agpr140 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 460, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-NEXT:   $agpr139 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 464, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-NEXT:   $agpr138 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 468, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-NEXT:   $agpr137 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 472, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-NEXT:   $agpr136 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 476, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-NEXT:   $agpr135 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 480, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-NEXT:   $agpr134 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 484, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-NEXT:   $agpr133 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 488, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-NEXT:   $agpr132 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 492, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-NEXT:   $agpr131 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 496, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-NEXT:   $agpr130 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 500, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-NEXT:   $agpr129 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 504, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-NEXT:   $agpr128 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 508, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-NEXT:   $agpr127 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 512, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-NEXT:   $agpr126 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 516, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-NEXT:   $agpr125 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 520, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-NEXT:   $agpr124 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 524, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-NEXT:   $agpr123 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 528, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-NEXT:   $agpr122 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 532, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-NEXT:   $agpr121 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 536, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-NEXT:   $agpr120 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 540, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-NEXT:   $agpr119 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 544, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-NEXT:   $agpr118 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 548, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-NEXT:   $agpr117 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 552, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-NEXT:   $agpr116 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 556, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-NEXT:   $agpr115 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 560, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-NEXT:   $agpr114 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 564, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-NEXT:   $agpr113 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 568, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-NEXT:   $agpr112 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 572, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-NEXT:   $agpr111 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 576, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-NEXT:   $agpr110 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 580, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-NEXT:   $agpr109 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 584, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-NEXT:   $agpr108 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 588, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-NEXT:   $agpr107 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 592, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-NEXT:   $agpr106 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 596, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-NEXT:   $agpr105 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 600, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-NEXT:   $agpr104 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 604, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-NEXT:   $agpr103 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 608, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-NEXT:   $agpr102 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 612, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-NEXT:   $agpr101 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 616, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-NEXT:   $agpr100 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 620, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-NEXT:   $agpr99 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 624, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-NEXT:   $agpr98 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 628, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-NEXT:   $agpr97 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 632, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-NEXT:   $agpr96 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 636, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-NEXT:   $agpr95 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 640, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-NEXT:   $agpr94 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 644, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-NEXT:   $agpr93 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 648, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-NEXT:   $agpr92 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 652, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-NEXT:   $agpr91 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 656, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-NEXT:   $agpr90 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 660, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-NEXT:   $agpr89 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 664, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-NEXT:   $agpr88 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 668, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-NEXT:   $agpr87 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 672, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-NEXT:   $agpr86 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 676, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-NEXT:   $agpr85 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 680, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-NEXT:   $agpr84 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 684, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-NEXT:   $agpr83 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 688, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-NEXT:   $agpr82 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 692, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-NEXT:   $agpr81 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 696, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-NEXT:   $agpr80 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 700, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
@@ -5634,16 +5634,16 @@ body:             |
   ; GFX908-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $agpr0_agpr1
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
   ; GFX908-FLATSCR-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1 + 4, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   $vgpr0 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
-  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 8, addrspace 5)
-  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, $vgpr1, 8, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.1 + 8, addrspace 5)
+  ; GFX908-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX908-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX908-FLATSCR-NEXT: {{  $}}
   ; GFX908-FLATSCR-NEXT: bb.1:
@@ -5709,188 +5709,188 @@ body:             |
   ; GFX90A-FLATSCR-NEXT:   $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr77, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr78, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr79, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.50, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr80, $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr81, $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr82, $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr83, $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr84, $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr85, $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr86, $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr87, $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr88, $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr89, $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr90, $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr91, $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr92, $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr93, $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr94, $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr95, $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr96, $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr97, $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr98, $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr99, $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr100, $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr101, $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr102, $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr103, $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr104, $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr105, $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr106, $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr107, $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr108, $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr109, $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr110, $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr111, $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr112, $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr113, $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr114, $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr115, $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr116, $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr117, $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr118, $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr119, $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr120, $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr121, $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr122, $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr123, $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr124, $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr125, $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr126, $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr127, $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr128, $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr129, $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr130, $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr131, $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr132, $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr133, $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr134, $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr135, $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr136, $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr137, $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr138, $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr139, $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr140, $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr141, $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr142, $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr143, $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr144, $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr145, $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr146, $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr147, $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr148, $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr149, $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr150, $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr151, $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr152, $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr153, $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr154, $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr155, $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr156, $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr157, $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr158, $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr159, $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr160, $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr161, $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr162, $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr163, $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr164, $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr165, $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr166, $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr167, $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr168, $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr169, $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr170, $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr171, $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr172, $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr173, $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr174, $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr175, $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr176, $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr177, $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr178, $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr179, $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr180, $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr181, $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr182, $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr183, $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr184, $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr185, $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr186, $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr187, $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr188, $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr189, $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr190, $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr191, $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr192, $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr193, $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr194, $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr195, $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr196, $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr197, $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr198, $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr199, $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr200, $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr201, $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr202, $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr203, $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr204, $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr205, $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr206, $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr207, $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr208, $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr209, $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr210, $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr211, $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr212, $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr213, $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr214, $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr215, $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr216, $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr217, $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr218, $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr219, $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr220, $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr221, $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr222, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr223, $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr224, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr225, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr226, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr227, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr228, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr229, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr230, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr231, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr232, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr233, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr234, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr235, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr236, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr237, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr238, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr239, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr240, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr241, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr242, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr243, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr244, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr245, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr246, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr247, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr248, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr249, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr250, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr251, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr252, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr253, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr254, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $agpr255, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.225, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $vgpr40 = V_ADD_U32_e32 8904, $vgpr40, implicit $exec
-  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX3 $agpr0_agpr1_agpr2, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.1, align 4, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.226, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX3 $agpr0_agpr1_agpr2, killed $vgpr40, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s96) into %stack.1, align 4, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 704, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.226, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX90A-FLATSCR-NEXT: {{  $}}
   ; GFX90A-FLATSCR-NEXT: bb.1:
@@ -5902,182 +5902,182 @@ body:             |
   ; GFX90A-FLATSCR-NEXT: bb.2:
   ; GFX90A-FLATSCR-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31, $vgpr32, $vgpr33, $vgpr34, $vgpr35, $vgpr36, $vgpr37, $vgpr38, $vgpr39, $vgpr48, $vgpr49, $vgpr50, $vgpr51, $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-FLATSCR-NEXT: {{  $}}
-  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.225, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.224, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.223, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.222, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.221, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.220, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.219, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.218, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.217, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.216, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.215, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.214, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.213, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.212, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.211, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.210, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.209, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.208, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.207, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.206, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.205, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.204, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.203, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.202, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.201, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.200, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.199, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.198, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.197, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.196, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.195, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.194, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.193, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.192, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.191, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.190, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.189, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.188, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.187, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.186, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.185, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.184, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.183, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.182, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.181, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.180, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.179, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.178, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.177, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.176, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.175, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.174, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.173, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.172, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.171, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.170, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.169, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.168, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.167, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.166, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.165, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.164, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.163, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.162, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.161, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.160, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.159, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.158, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.157, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.156, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.155, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.154, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.153, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.152, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.151, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.150, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.149, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.148, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.147, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.146, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.145, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.144, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.143, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.142, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.141, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.140, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.139, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.138, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.137, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.136, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.135, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.134, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.133, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.132, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.131, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.130, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.129, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.128, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.127, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.126, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.125, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.124, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.123, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.122, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.121, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.120, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.119, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.118, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.117, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.116, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.115, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.114, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.113, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.112, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.111, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.110, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.109, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.108, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.107, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.106, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.105, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.104, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.103, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.102, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.101, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.100, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.99, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.98, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.97, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.96, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.95, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.94, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.93, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.92, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.91, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.90, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.89, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.88, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.87, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.86, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.85, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.84, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.83, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.82, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.81, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.80, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.79, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.78, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.77, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.76, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.75, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.74, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.68, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.67, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.66, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.65, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.64, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.63, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.62, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.61, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.60, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.59, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.58, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.57, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.56, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.55, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.54, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.53, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.52, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.51, addrspace 5)
-  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.50, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr255 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.225, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr254 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.224, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr253 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.223, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr252 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.222, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr251 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.221, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr250 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.220, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr249 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.219, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr248 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.218, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr247 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.217, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr246 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.216, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr245 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.215, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr244 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.214, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr243 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.213, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr242 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.212, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr241 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.211, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr240 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.210, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr239 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.209, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr238 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.208, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr237 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.207, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr236 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.206, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr235 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.205, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr234 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.204, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr233 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.203, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr232 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.202, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr231 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.201, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr230 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.200, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr229 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.199, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr228 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.198, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr227 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.197, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr226 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.196, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr225 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.195, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr224 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.194, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr223 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 128, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.193, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr222 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.192, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr221 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 136, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.191, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr220 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 140, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.190, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr219 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 144, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.189, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr218 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 148, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.188, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr217 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 152, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.187, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr216 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 156, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.186, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr215 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.185, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr214 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 164, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.184, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr213 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 168, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.183, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr212 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 172, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.182, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr211 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 176, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.181, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr210 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 180, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.180, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr209 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 184, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.179, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr208 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 188, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.178, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr207 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 192, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.177, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr206 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 196, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.176, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr205 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 200, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.175, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr204 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 204, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.174, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr203 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 208, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.173, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr202 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 212, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.172, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr201 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 216, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.171, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr200 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 220, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.170, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr199 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 224, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.169, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr198 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 228, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.168, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr197 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 232, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.167, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr196 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 236, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.166, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr195 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 240, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.165, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr194 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 244, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.164, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr193 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 248, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.163, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 252, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.162, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 256, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.161, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr190 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 260, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.160, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr189 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 264, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.159, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr188 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 268, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.158, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr187 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 272, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.157, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr186 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 276, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.156, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr185 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 280, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.155, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr184 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 284, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.154, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr183 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 288, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.153, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr182 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 292, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.152, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr181 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 296, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.151, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr180 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 300, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.150, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr179 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 304, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.149, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr178 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 308, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.148, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr177 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 312, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.147, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr176 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 316, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.146, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr175 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 320, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.145, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr174 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 324, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.144, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr173 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 328, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.143, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr172 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 332, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.142, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr171 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 336, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.141, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr170 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 340, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.140, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr169 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 344, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.139, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr168 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 348, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.138, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr167 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 352, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.137, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr166 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 356, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.136, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr165 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 360, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.135, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr164 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 364, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.134, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr163 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 368, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.133, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr162 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 372, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.132, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr161 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 376, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.131, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr160 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 380, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.130, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr159 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 384, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.129, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr158 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 388, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.128, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr157 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 392, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.127, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr156 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 396, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.126, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr155 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 400, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.125, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr154 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 404, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.124, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr153 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 408, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.123, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr152 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 412, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.122, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr151 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 416, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.121, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr150 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 420, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.120, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr149 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 424, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.119, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr148 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 428, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.118, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr147 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 432, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.117, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr146 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 436, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.116, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr145 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 440, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.115, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr144 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 444, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.114, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr143 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 448, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.113, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr142 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 452, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.112, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr141 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 456, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.111, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr140 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 460, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.110, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr139 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 464, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.109, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr138 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 468, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.108, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr137 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 472, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.107, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr136 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 476, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.106, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr135 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 480, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.105, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr134 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 484, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.104, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr133 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 488, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.103, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr132 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 492, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.102, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr131 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 496, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.101, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr130 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 500, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.100, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr129 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 504, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.99, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr128 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 508, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.98, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr127 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 512, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.97, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr126 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 516, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.96, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr125 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 520, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.95, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr124 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 524, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.94, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr123 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 528, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.93, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr122 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 532, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.92, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr121 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 536, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.91, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr120 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 540, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.90, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr119 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 544, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.89, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr118 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 548, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.88, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr117 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 552, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.87, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr116 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 556, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.86, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr115 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 560, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.85, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr114 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 564, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.84, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr113 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 568, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.83, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr112 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 572, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.82, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr111 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 576, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.81, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr110 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 580, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.80, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr109 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 584, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.79, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr108 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 588, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.78, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr107 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 592, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.77, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr106 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 596, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.76, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr105 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 600, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.75, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr104 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 604, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.74, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr103 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 608, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.73, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr102 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 612, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.72, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr101 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 616, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.71, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr100 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 620, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.70, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr99 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 624, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.69, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr98 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 628, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.68, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr97 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 632, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.67, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr96 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 636, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.66, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr95 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 640, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.65, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr94 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 644, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.64, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr93 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 648, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.63, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr92 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 652, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.62, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr91 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 656, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.61, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr90 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 660, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.60, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr89 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 664, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.59, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr88 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 668, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.58, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr87 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 672, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.57, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr86 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 676, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.56, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr85 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 680, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.55, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr84 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 684, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.54, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr83 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 688, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.53, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr82 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 692, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.52, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr81 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 696, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.51, addrspace 5)
+  ; GFX90A-FLATSCR-NEXT:   $agpr80 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 700, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.50, addrspace 5)
   ; GFX90A-FLATSCR-NEXT:   $agpr79 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr78 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec
   ; GFX90A-FLATSCR-NEXT:   $agpr77 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir b/llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir
index fd0556d856c3d..973702ff9479f 100644
--- a/llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/agpr-spill-copy.mir
@@ -21,18 +21,18 @@ body:             |
     ; GFX942-NEXT: renamable $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27 = IMPLICIT_DEF
     ; GFX942-NEXT: renamable $agpr28_agpr29 = IMPLICIT_DEF
     ; GFX942-NEXT: renamable $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; GFX942-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 32, align 4, addrspace 5)
     ; GFX942-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX942-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; GFX942-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr12_vgpr13, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s64) into %stack.0 + 48, align 4, addrspace 5)
-    ; GFX942-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; GFX942-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; GFX942-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; GFX942-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr12_vgpr13, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s64) into %stack.0 + 48, align 4, addrspace 5)
+    ; GFX942-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; GFX942-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; GFX942-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 32, align 4, addrspace 5)
     ; GFX942-NEXT: $agpr15 = COPY $agpr30, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
     ; GFX942-NEXT: $agpr14 = COPY $agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; GFX942-NEXT: $agpr12_agpr13 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0 + 48, align 4, addrspace 5)
+    ; GFX942-NEXT: $agpr12_agpr13 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.0 + 48, align 4, addrspace 5)
     ; GFX942-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
     renamable $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27 = IMPLICIT_DEF
     renamable $agpr28_agpr29 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir b/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir
index 59d3c0140110e..9ce869ee08324 100644
--- a/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir
+++ b/llvm/test/CodeGen/AMDGPU/av-spill-expansion-with-machine-cp.mir
@@ -70,9 +70,9 @@ body: |
     ; GFX908-PEI-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = IMPLICIT_DEF
     ; GFX908-PEI-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = IMPLICIT_DEF
     ; GFX908-PEI-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-    ; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; GFX908-PEI-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
-    ; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; GFX908-PEI-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; GFX908-PEI-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
     ; GFX908-PEI-NEXT: S_ENDPGM 0
     ;
@@ -86,9 +86,9 @@ body: |
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9 = IMPLICIT_DEF
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = IMPLICIT_DEF
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-    ; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr40 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
-    ; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; GFX908-PEI-MACHINECP-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; GFX908-PEI-MACHINECP-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
     ; GFX908-PEI-MACHINECP-NEXT: S_ENDPGM 0
     renamable $agpr0 = COPY renamable $vgpr0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
index 0cf5fe03c09ea..f945766d69cf0 100644
--- a/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
+++ b/llvm/test/CodeGen/AMDGPU/av_spill_cross_bb_usage.mir
@@ -28,20 +28,20 @@ body:             |
   ; GCN-NEXT:   liveins: $sgpr30, $sgpr31, $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $sgpr30_sgpr31
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
   ; GCN-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.5, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.6, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.7, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.8, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.10, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.11, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.12, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.13, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.14, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.15, addrspace 5)
   ; GCN-NEXT:   renamable $vgpr44 = COPY $vgpr13, implicit $exec
   ; GCN-NEXT:   renamable $vgpr43 = COPY $vgpr12, implicit $exec
   ; GCN-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit undef $scc
@@ -64,17 +64,17 @@ body:             |
   ; GCN-NEXT:   renamable $sgpr16_sgpr17 = IMPLICIT_DEF
   ; GCN-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40, implicit-def $sgpr30_sgpr31, implicit $sgpr30_sgpr31
   ; GCN-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40, implicit $sgpr30_sgpr31
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15, implicit $vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec, implicit killed $vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr10_vgpr11, implicit $vgpr10_vgpr11 :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec, implicit killed $vgpr10_vgpr11 :: ("amdgpu-non-volatile" store (s32) into %stack.2 + 4, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15, implicit $vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec, implicit killed $vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s32) into %stack.1 + 4, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr10_vgpr11, implicit $vgpr10_vgpr11 :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec, implicit killed $vgpr10_vgpr11 :: ("amdgpu-thread-private" store (s32) into %stack.2 + 4, addrspace 5)
   ; GCN-NEXT:   dead $sgpr30_sgpr31 = SI_CALL killed renamable $sgpr16_sgpr17, 0, csr_amdgpu, implicit-def dead $vgpr0
-  ; GCN-NEXT:   $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-  ; GCN-NEXT:   $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
+  ; GCN-NEXT:   $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec, implicit-def $vgpr14_vgpr15 :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+  ; GCN-NEXT:   $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1 + 4, addrspace 5)
   ; GCN-NEXT:   renamable $vgpr0_vgpr1 = nofpexcept V_FMA_F64_e64 0, killed $vgpr45_vgpr46, 0, killed $vgpr41_vgpr42, 0, killed $vgpr60_vgpr61, 0, 0, implicit $mode, implicit $exec
   ; GCN-NEXT:   FLAT_STORE_DWORDX2 killed renamable $vgpr58_vgpr59, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
-  ; GCN-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
-  ; GCN-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2 + 4, addrspace 5)
+  ; GCN-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
+  ; GCN-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2 + 4, addrspace 5)
   ; GCN-NEXT:   FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr56_vgpr57, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.2:
@@ -83,20 +83,20 @@ body:             |
   ; GCN-NEXT:   renamable $vgpr0_vgpr1 = V_MOV_B64_PSEUDO 0, implicit $exec
   ; GCN-NEXT:   FLAT_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr43_vgpr44, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
   ; GCN-NEXT:   FLAT_STORE_DWORDX2 killed renamable $vgpr0_vgpr1, killed renamable $vgpr14_vgpr15, 0, 0, implicit $exec, implicit $flat_scr :: (store (s64))
-  ; GCN-NEXT:   $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
-  ; GCN-NEXT:   $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
-  ; GCN-NEXT:   $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
-  ; GCN-NEXT:   $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
-  ; GCN-NEXT:   $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
-  ; GCN-NEXT:   $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
-  ; GCN-NEXT:   $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
-  ; GCN-NEXT:   $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
-  ; GCN-NEXT:   $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
-  ; GCN-NEXT:   $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
-  ; GCN-NEXT:   $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
-  ; GCN-NEXT:   $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+  ; GCN-NEXT:   $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.15, addrspace 5)
+  ; GCN-NEXT:   $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.14, addrspace 5)
+  ; GCN-NEXT:   $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.13, addrspace 5)
+  ; GCN-NEXT:   $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.12, addrspace 5)
+  ; GCN-NEXT:   $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.11, addrspace 5)
+  ; GCN-NEXT:   $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.10, addrspace 5)
+  ; GCN-NEXT:   $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
+  ; GCN-NEXT:   $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.8, addrspace 5)
+  ; GCN-NEXT:   $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.7, addrspace 5)
+  ; GCN-NEXT:   $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.6, addrspace 5)
+  ; GCN-NEXT:   $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.5, addrspace 5)
+  ; GCN-NEXT:   $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
   ; GCN-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; GCN-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+  ; GCN-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
   ; GCN-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
   ; GCN-NEXT:   S_SETPC_B64_return undef $sgpr30_sgpr31
   bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir b/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir
index aa7744f44965c..eacb9085e0a7a 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir
+++ b/llvm/test/CodeGen/AMDGPU/branch-relax-indirect-branch.mir
@@ -35,7 +35,7 @@ body:             |
   ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
   ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1
   ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1
   ; CHECK-NEXT:   $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
@@ -61,7 +61,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
   ; CHECK-NEXT:   $sgpr1 = V_READLANE_B32 killed $vgpr2, 1
-  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
@@ -77,7 +77,7 @@ body:             |
   ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
   ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1
   ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1
   ; CHECK-NEXT:   $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
@@ -103,7 +103,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
   ; CHECK-NEXT:   $sgpr1 = V_READLANE_B32 killed $vgpr2, 1
-  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4:
diff --git a/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir b/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir
index fcf89426c4075..7aa9b4717fd60 100644
--- a/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir
+++ b/llvm/test/CodeGen/AMDGPU/branch-relax-no-terminators.mir
@@ -36,7 +36,7 @@ body:             |
   ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr32, $sgpr33, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
   ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1
   ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1
   ; CHECK-NEXT:   $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
@@ -62,7 +62,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
   ; CHECK-NEXT:   $sgpr1 = V_READLANE_B32 killed $vgpr2, 1
-  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.2:
@@ -78,7 +78,7 @@ body:             |
   ; CHECK-NEXT:   liveins: $vcc_hi, $vcc_lo, $sgpr5, $sgpr6, $sgpr7, $sgpr8, $sgpr9, $sgpr10, $sgpr11, $sgpr12, $sgpr13, $sgpr14, $sgpr15, $sgpr16, $sgpr17, $sgpr18, $sgpr19, $sgpr20, $sgpr21, $sgpr22, $sgpr23, $sgpr24, $sgpr25, $sgpr26, $sgpr27, $sgpr28, $sgpr29, $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $sgpr38, $sgpr39, $sgpr40, $sgpr41, $sgpr42, $sgpr43, $sgpr44, $sgpr45, $sgpr46, $sgpr47, $sgpr48, $sgpr49, $sgpr50, $sgpr51, $sgpr52, $sgpr53, $sgpr54, $sgpr55, $sgpr56, $sgpr57, $sgpr58, $sgpr59, $sgpr60, $sgpr61, $sgpr62, $sgpr63, $sgpr64, $sgpr65, $sgpr66, $sgpr67, $sgpr68, $sgpr69, $sgpr70, $sgpr71, $sgpr72, $sgpr73, $sgpr74, $sgpr75, $sgpr76, $sgpr77, $sgpr78, $sgpr79, $sgpr80, $sgpr81, $sgpr82, $sgpr83, $sgpr84, $sgpr85, $sgpr86, $sgpr87, $sgpr88, $sgpr89, $sgpr90, $sgpr91, $sgpr92, $sgpr93, $sgpr94, $sgpr95, $sgpr96, $sgpr97, $sgpr98, $sgpr99, $sgpr100, $sgpr101, $vgpr0, $vgpr1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit-def $vgpr2
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
   ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr0, 0, undef $vgpr2, implicit $sgpr0_sgpr1
   ; CHECK-NEXT:   $vgpr2 = V_WRITELANE_B32 $sgpr1, 1, $vgpr2, implicit $sgpr0_sgpr1
   ; CHECK-NEXT:   $sgpr0_sgpr1 = S_GETPC_B64 post-instr-symbol <mcsymbol >
@@ -105,7 +105,7 @@ body:             |
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr0 = V_READLANE_B32 $vgpr2, 0, implicit-def $sgpr0_sgpr1
   ; CHECK-NEXT:   $sgpr1 = V_READLANE_B32 killed $vgpr2, 1
-  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
   ; CHECK-NEXT:   $exec = S_NOT_B64 $exec, implicit-def dead $scc, implicit killed $vgpr2
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.4:
diff --git a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir
index 229f614cc56d1..0b043da201275 100644
--- a/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/bug-undef-spilled-agpr.mir
@@ -21,9 +21,9 @@ body:             |
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT:   $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
   ; GCN-NEXT:   $vgpr63 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GCN-NEXT:   $exec = S_MOV_B64 -1
-  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+  ; GCN-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
   ; GCN-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
   ; GCN-NEXT:   renamable $vgpr62 = IMPLICIT_DEF
   ; GCN-NEXT:   $vgpr62 = SI_SPILL_S32_TO_VGPR $sgpr15, 0, killed $vgpr62
@@ -59,10 +59,10 @@ body:             |
   ; GCN-NEXT: {{  $}}
   ; GCN-NEXT: bb.4:
   ; GCN-NEXT:   $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; GCN-NEXT:   $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GCN-NEXT:   $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GCN-NEXT:   $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
   ; GCN-NEXT:   $exec = S_MOV_B64 -1
-  ; GCN-NEXT:   $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+  ; GCN-NEXT:   $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
   ; GCN-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
   ; GCN-NEXT:   SI_RETURN
   bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
index 17be1db7db814..6550fa61c77ab 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-s-mov-b32.mir
@@ -300,22 +300,22 @@ body:   |
     ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
     ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.5, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.6, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.7, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.8, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.10, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.11, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.12, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.13, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.14, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.15, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.16, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.17, addrspace 5)
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -326,12 +326,12 @@ body:   |
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX8-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
     ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.18, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.18, addrspace 5)
     ; GFX8-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX8-NEXT: $sgpr4 = S_MOV_B32 128
     ; GFX8-NEXT: $vgpr1, dead $sgpr6_sgpr7 = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr1, 0, implicit $exec
     ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
-    ; GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.18, addrspace 5)
+    ; GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.18, addrspace 5)
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -340,43 +340,43 @@ body:   |
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
-    ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
-    ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
-    ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
-    ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
-    ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
-    ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
-    ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
-    ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
-    ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
-    ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
-    ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
-    ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
-    ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
-    ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
-    ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.17, addrspace 5)
+    ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.16, addrspace 5)
+    ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.15, addrspace 5)
+    ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.14, addrspace 5)
+    ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.13, addrspace 5)
+    ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.12, addrspace 5)
+    ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.11, addrspace 5)
+    ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.10, addrspace 5)
+    ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
+    ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.8, addrspace 5)
+    ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.7, addrspace 5)
+    ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.6, addrspace 5)
+    ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.5, addrspace 5)
+    ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
+    ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
+    ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
     ;
     ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
     ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX900-NEXT: {{  $}}
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.5, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.6, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.7, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.8, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.10, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.11, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.12, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.13, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.14, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.15, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.16, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.17, addrspace 5)
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -387,11 +387,11 @@ body:   |
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX900-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
     ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.18, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.18, addrspace 5)
     ; GFX900-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX900-NEXT: $vgpr1 = V_ADD_U32_e32 128, killed $vgpr1, implicit $exec
     ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
-    ; GFX900-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.18, addrspace 5)
+    ; GFX900-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.18, addrspace 5)
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -400,22 +400,22 @@ body:   |
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
-    ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
-    ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
-    ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
-    ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
-    ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
-    ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
-    ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
-    ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
-    ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
-    ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
-    ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
-    ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
-    ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
-    ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
-    ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.17, addrspace 5)
+    ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.16, addrspace 5)
+    ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.15, addrspace 5)
+    ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.14, addrspace 5)
+    ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.13, addrspace 5)
+    ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.12, addrspace 5)
+    ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.11, addrspace 5)
+    ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.10, addrspace 5)
+    ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
+    ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.8, addrspace 5)
+    ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.7, addrspace 5)
+    ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.6, addrspace 5)
+    ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.5, addrspace 5)
+    ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
+    ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
+    ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
     ;
     ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
@@ -447,11 +447,11 @@ body:   |
     ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX90A-NEXT: V_CMP_EQ_U32_e32 0, killed $vgpr0, implicit-def $vcc, implicit $exec
     ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.18, addrspace 5)
+    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.18, addrspace 5)
     ; GFX90A-NEXT: $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX90A-NEXT: $vgpr1 = V_ADD_U32_e32 64, killed $vgpr1, implicit $exec
     ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
-    ; GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.18, addrspace 5)
+    ; GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.18, addrspace 5)
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -481,22 +481,22 @@ body:   |
     ; GFX1010-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
     ; GFX1010: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX1010-NEXT: {{  $}}
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
-    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.5, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.6, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.7, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.8, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.10, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.11, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.12, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.13, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.14, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.15, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.16, addrspace 5)
+    ; GFX1010-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.17, addrspace 5)
     ; GFX1010-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX1010-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX1010-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -518,43 +518,43 @@ body:   |
     ; GFX1010-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX1010-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX1010-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX1010-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
-    ; GFX1010-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
-    ; GFX1010-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
-    ; GFX1010-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
-    ; GFX1010-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
-    ; GFX1010-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
-    ; GFX1010-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
-    ; GFX1010-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
-    ; GFX1010-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
-    ; GFX1010-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
-    ; GFX1010-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
-    ; GFX1010-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
-    ; GFX1010-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
-    ; GFX1010-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
-    ; GFX1010-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
-    ; GFX1010-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GFX1010-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.17, addrspace 5)
+    ; GFX1010-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.16, addrspace 5)
+    ; GFX1010-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.15, addrspace 5)
+    ; GFX1010-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.14, addrspace 5)
+    ; GFX1010-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.13, addrspace 5)
+    ; GFX1010-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.12, addrspace 5)
+    ; GFX1010-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.11, addrspace 5)
+    ; GFX1010-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.10, addrspace 5)
+    ; GFX1010-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
+    ; GFX1010-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.8, addrspace 5)
+    ; GFX1010-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.7, addrspace 5)
+    ; GFX1010-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.6, addrspace 5)
+    ; GFX1010-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.5, addrspace 5)
+    ; GFX1010-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
+    ; GFX1010-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
+    ; GFX1010-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; GFX1010-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc_lo
     ;
     ; GFX1100-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
     ; GFX1100: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX1100-NEXT: {{  $}}
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr43, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr45, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr46, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr47, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr56, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr57, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr58, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr59, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr60, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr61, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr62, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
-    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr63, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr43, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.5, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.6, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr45, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.7, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr46, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.8, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr47, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr56, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.10, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr57, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.11, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr58, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.12, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr59, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.13, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr60, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.14, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr61, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.15, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr62, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.16, addrspace 5)
+    ; GFX1100-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr63, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.17, addrspace 5)
     ; GFX1100-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX1100-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX1100-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -577,43 +577,43 @@ body:   |
     ; GFX1100-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX1100-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX1100-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX1100-NEXT: $vgpr63 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
-    ; GFX1100-NEXT: $vgpr62 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
-    ; GFX1100-NEXT: $vgpr61 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
-    ; GFX1100-NEXT: $vgpr60 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
-    ; GFX1100-NEXT: $vgpr59 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
-    ; GFX1100-NEXT: $vgpr58 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
-    ; GFX1100-NEXT: $vgpr57 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
-    ; GFX1100-NEXT: $vgpr56 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
-    ; GFX1100-NEXT: $vgpr47 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
-    ; GFX1100-NEXT: $vgpr46 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
-    ; GFX1100-NEXT: $vgpr45 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
-    ; GFX1100-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
-    ; GFX1100-NEXT: $vgpr43 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
-    ; GFX1100-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
-    ; GFX1100-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
-    ; GFX1100-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GFX1100-NEXT: $vgpr63 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.17, addrspace 5)
+    ; GFX1100-NEXT: $vgpr62 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.16, addrspace 5)
+    ; GFX1100-NEXT: $vgpr61 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.15, addrspace 5)
+    ; GFX1100-NEXT: $vgpr60 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.14, addrspace 5)
+    ; GFX1100-NEXT: $vgpr59 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.13, addrspace 5)
+    ; GFX1100-NEXT: $vgpr58 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.12, addrspace 5)
+    ; GFX1100-NEXT: $vgpr57 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.11, addrspace 5)
+    ; GFX1100-NEXT: $vgpr56 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.10, addrspace 5)
+    ; GFX1100-NEXT: $vgpr47 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
+    ; GFX1100-NEXT: $vgpr46 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.8, addrspace 5)
+    ; GFX1100-NEXT: $vgpr45 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.7, addrspace 5)
+    ; GFX1100-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.6, addrspace 5)
+    ; GFX1100-NEXT: $vgpr43 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.5, addrspace 5)
+    ; GFX1100-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
+    ; GFX1100-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
+    ; GFX1100-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; GFX1100-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc_lo
     ;
     ; GFX1200-LABEL: name: materialize_fi_s_mov_b32_offset_64_live_scc_live_vcc_no_vgpr
     ; GFX1200: liveins: $sgpr4, $sgpr5, $vgpr0, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX1200-NEXT: {{  $}}
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr43, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr45, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr46, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr47, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr56, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr57, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr58, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr59, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr60, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr61, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr62, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
-    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr63, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr40, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr43, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.5, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.6, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr45, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.7, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr46, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.8, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr47, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr56, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.10, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr57, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.11, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr58, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.12, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr59, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.13, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr60, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.14, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr61, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.15, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr62, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.16, addrspace 5)
+    ; GFX1200-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr63, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.17, addrspace 5)
     ; GFX1200-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX1200-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX1200-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -636,22 +636,22 @@ body:   |
     ; GFX1200-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX1200-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX1200-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX1200-NEXT: $vgpr63 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
-    ; GFX1200-NEXT: $vgpr62 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
-    ; GFX1200-NEXT: $vgpr61 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
-    ; GFX1200-NEXT: $vgpr60 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
-    ; GFX1200-NEXT: $vgpr59 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
-    ; GFX1200-NEXT: $vgpr58 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
-    ; GFX1200-NEXT: $vgpr57 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
-    ; GFX1200-NEXT: $vgpr56 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
-    ; GFX1200-NEXT: $vgpr47 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
-    ; GFX1200-NEXT: $vgpr46 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
-    ; GFX1200-NEXT: $vgpr45 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
-    ; GFX1200-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
-    ; GFX1200-NEXT: $vgpr43 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
-    ; GFX1200-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
-    ; GFX1200-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
-    ; GFX1200-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GFX1200-NEXT: $vgpr63 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.17, addrspace 5)
+    ; GFX1200-NEXT: $vgpr62 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.16, addrspace 5)
+    ; GFX1200-NEXT: $vgpr61 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.15, addrspace 5)
+    ; GFX1200-NEXT: $vgpr60 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.14, addrspace 5)
+    ; GFX1200-NEXT: $vgpr59 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.13, addrspace 5)
+    ; GFX1200-NEXT: $vgpr58 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.12, addrspace 5)
+    ; GFX1200-NEXT: $vgpr57 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.11, addrspace 5)
+    ; GFX1200-NEXT: $vgpr56 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.10, addrspace 5)
+    ; GFX1200-NEXT: $vgpr47 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
+    ; GFX1200-NEXT: $vgpr46 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.8, addrspace 5)
+    ; GFX1200-NEXT: $vgpr45 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.7, addrspace 5)
+    ; GFX1200-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.6, addrspace 5)
+    ; GFX1200-NEXT: $vgpr43 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.5, addrspace 5)
+    ; GFX1200-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
+    ; GFX1200-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
+    ; GFX1200-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; GFX1200-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc_lo
   S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
   S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
@@ -1068,4 +1068,3 @@ body:             |
   S_ENDPGM 0, implicit $sgpr4, implicit $scc, implicit killed $vcc
 
 ...
-
diff --git a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
index e7bc77b8676bd..79486d56c55ca 100644
--- a/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
+++ b/llvm/test/CodeGen/AMDGPU/eliminate-frame-index-v-add-co-u32.mir
@@ -1053,71 +1053,71 @@ body:             |
     ; GFX7-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
     ; GFX7: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; GFX7-NEXT: {{  $}}
-    ; GFX7-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GFX7-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
     ; GFX7-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
     ; GFX7-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
     ; GFX7-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
-    ; GFX7-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GFX7-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; GFX7-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; GFX7-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX8-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
     ; GFX8: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
     ; GFX8-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
     ; GFX8-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
     ; GFX8-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
-    ; GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GFX8-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; GFX8-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX900-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
     ; GFX900: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; GFX900-NEXT: {{  $}}
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
     ; GFX900-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
     ; GFX900-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
     ; GFX900-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
-    ; GFX900-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GFX900-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; GFX900-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX90A-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
     ; GFX90A: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; GFX90A-NEXT: {{  $}}
-    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
     ; GFX90A-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
     ; GFX90A-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
     ; GFX90A-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
-    ; GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; GFX90A-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX10-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
     ; GFX10: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; GFX10-NEXT: {{  $}}
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
     ; GFX10-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
     ; GFX10-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 128, killed $vgpr0, 0, implicit $exec
-    ; GFX10-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GFX10-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; GFX10-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; GFX10-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; GFX942-LABEL: name: v_add_co_u32_e64__fi_literal_offset__sgpr__scavenge_spill_required
     ; GFX942: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; GFX942-NEXT: {{  $}}
-    ; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GFX942-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
     ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
     ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 $sgpr8, killed $vgpr1, 0, implicit $exec
     ; GFX942-NEXT: $vgpr1 = V_MOV_B32_e32 128, implicit $exec
     ; GFX942-NEXT: renamable $vgpr0, renamable $vcc = V_ADD_CO_U32_e64 killed $vgpr1, killed $vgpr0, 0, implicit $exec
-    ; GFX942-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GFX942-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; GFX942-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; GFX942-NEXT: SI_RETURN implicit $vgpr0
     ;
@@ -1160,22 +1160,22 @@ body:             |
     ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required
     ; MUBUFW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; MUBUFW64-NEXT: {{  $}}
-    ; MUBUFW64-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; MUBUFW64-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
     ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
     ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec
-    ; MUBUFW64-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; MUBUFW64-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; MUBUFW64-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
     ;
     ; FLATSCRW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__sgpr__scavenge_spill_required
     ; FLATSCRW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; FLATSCRW64-NEXT: {{  $}}
-    ; FLATSCRW64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; FLATSCRW64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
     ; FLATSCRW64-NEXT: $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
     ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
     ; FLATSCRW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec
-    ; FLATSCRW64-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; FLATSCRW64-NEXT: $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 132, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; FLATSCRW64-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; FLATSCRW64-NEXT: SI_RETURN implicit $vgpr0
     renamable $vgpr0 = V_ADD_CO_U32_e32 $sgpr8, %stack.1, implicit-def dead $vcc, implicit $exec
@@ -1202,11 +1202,11 @@ body:             |
     ; MUBUFW64-LABEL: name: v_add_co_u32_e32__fi_literal_offset__vgpr__scavenge_spill_required
     ; MUBUFW64: liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253, $vgpr254, $vgpr255, $sgpr8
     ; MUBUFW64-NEXT: {{  $}}
-    ; MUBUFW64-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; MUBUFW64-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
     ; MUBUFW64-NEXT: renamable $vgpr1 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 $vgpr8, killed $vgpr1, implicit-def dead $vcc, implicit $exec
     ; MUBUFW64-NEXT: renamable $vgpr0 = V_ADD_CO_U32_e32 128, killed $vgpr0, implicit-def dead $vcc, implicit $exec
-    ; MUBUFW64-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; MUBUFW64-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; MUBUFW64-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, implicit $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, implicit $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, implicit $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, implicit $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, implicit $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, implicit $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, implicit $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, implicit $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, implicit $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, implicit $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, implicit $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, implicit $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, implicit $vgpr248_vgpr249_vgpr250_vgpr251, implicit $vgpr252_vgpr253, implicit $vgpr254, implicit $vgpr255
     ; MUBUFW64-NEXT: SI_RETURN implicit $vgpr0
     ;
diff --git a/llvm/test/CodeGen/AMDGPU/frame-index.mir b/llvm/test/CodeGen/AMDGPU/frame-index.mir
index 56d65e0f91662..ab544e5336754 100644
--- a/llvm/test/CodeGen/AMDGPU/frame-index.mir
+++ b/llvm/test/CodeGen/AMDGPU/frame-index.mir
@@ -401,22 +401,22 @@ body:             |
     ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
     ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.5, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.6, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.7, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.8, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.10, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.11, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.12, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.13, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.14, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.15, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.16, addrspace 5)
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -426,12 +426,12 @@ body:             |
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.17, addrspace 5)
     ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX8-NEXT: $sgpr4 = S_MOV_B32 64
     ; GFX8-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
     ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
+    ; GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.17, addrspace 5)
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -440,43 +440,43 @@ body:             |
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
-    ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
-    ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
-    ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
-    ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
-    ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
-    ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
-    ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
-    ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
-    ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
-    ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
-    ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
-    ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
-    ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
-    ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
-    ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.16, addrspace 5)
+    ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.15, addrspace 5)
+    ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.14, addrspace 5)
+    ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.13, addrspace 5)
+    ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.12, addrspace 5)
+    ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.11, addrspace 5)
+    ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.10, addrspace 5)
+    ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
+    ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.8, addrspace 5)
+    ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.7, addrspace 5)
+    ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.6, addrspace 5)
+    ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.5, addrspace 5)
+    ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
+    ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
+    ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
+    ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
     ;
     ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
     ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX900-NEXT: {{  $}}
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.5, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.6, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.7, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.8, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.10, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.11, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.12, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.13, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.14, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.15, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.16, addrspace 5)
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -486,11 +486,11 @@ body:             |
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.17, addrspace 5)
     ; GFX900-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
     ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GFX900-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
+    ; GFX900-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.17, addrspace 5)
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -499,22 +499,22 @@ body:             |
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
-    ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
-    ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
-    ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
-    ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
-    ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
-    ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
-    ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
-    ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
-    ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
-    ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
-    ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
-    ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
-    ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
-    ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
-    ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.16, addrspace 5)
+    ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.15, addrspace 5)
+    ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.14, addrspace 5)
+    ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.13, addrspace 5)
+    ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.12, addrspace 5)
+    ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.11, addrspace 5)
+    ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.10, addrspace 5)
+    ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
+    ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.8, addrspace 5)
+    ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.7, addrspace 5)
+    ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.6, addrspace 5)
+    ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.5, addrspace 5)
+    ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
+    ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
+    ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
+    ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
     ;
     ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_0_live_scc__no_free_vgprs
@@ -545,10 +545,10 @@ body:             |
     ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
+    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.17, addrspace 5)
     ; GFX90A-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
+    ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.17, addrspace 5)
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -615,22 +615,22 @@ body:             |
     ; GFX8-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
     ; GFX8: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX8-NEXT: {{  $}}
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.5, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.6, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.7, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.8, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.10, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.11, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.12, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.13, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.14, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.15, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.16, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.17, addrspace 5)
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -640,12 +640,12 @@ body:             |
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX8-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX8-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.18, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.18, addrspace 5)
     ; GFX8-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX8-NEXT: $sgpr4 = S_MOV_B32 128
     ; GFX8-NEXT: $vgpr0, dead $vcc = V_ADD_CO_U32_e64 killed $sgpr4, killed $vgpr0, 0, implicit $exec
     ; GFX8-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.18, addrspace 5)
+    ; GFX8-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.18, addrspace 5)
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -654,43 +654,43 @@ body:             |
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX8-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
-    ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
-    ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
-    ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
-    ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
-    ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
-    ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
-    ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
-    ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
-    ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
-    ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
-    ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
-    ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
-    ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
-    ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
-    ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GFX8-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.17, addrspace 5)
+    ; GFX8-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.16, addrspace 5)
+    ; GFX8-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.15, addrspace 5)
+    ; GFX8-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.14, addrspace 5)
+    ; GFX8-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.13, addrspace 5)
+    ; GFX8-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.12, addrspace 5)
+    ; GFX8-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.11, addrspace 5)
+    ; GFX8-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.10, addrspace 5)
+    ; GFX8-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
+    ; GFX8-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.8, addrspace 5)
+    ; GFX8-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.7, addrspace 5)
+    ; GFX8-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.6, addrspace 5)
+    ; GFX8-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.5, addrspace 5)
+    ; GFX8-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
+    ; GFX8-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
+    ; GFX8-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; GFX8-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
     ;
     ; GFX900-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
     ; GFX900: liveins: $sgpr4, $sgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr43, $vgpr44, $vgpr45, $vgpr46, $vgpr47, $vgpr56, $vgpr57, $vgpr58, $vgpr59, $vgpr60, $vgpr61, $vgpr62, $vgpr63
     ; GFX900-NEXT: {{  $}}
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr41, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr42, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr43, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.5, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr44, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.6, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr45, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.7, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr46, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.8, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr47, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr56, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.10, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr57, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.11, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr58, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.12, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr59, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.13, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr60, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.14, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr61, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.15, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr62, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.16, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.17, addrspace 5)
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -700,11 +700,11 @@ body:             |
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX900-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX900-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.18, addrspace 5)
+    ; GFX900-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.18, addrspace 5)
     ; GFX900-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX900-NEXT: $vgpr0 = V_ADD_U32_e32 128, killed $vgpr0, implicit $exec
     ; GFX900-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GFX900-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.18, addrspace 5)
+    ; GFX900-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.18, addrspace 5)
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
@@ -713,22 +713,22 @@ body:             |
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX900-NEXT: S_NOP 0, implicit $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
-    ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
-    ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
-    ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
-    ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
-    ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
-    ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
-    ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
-    ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
-    ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
-    ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
-    ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
-    ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
-    ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
-    ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
-    ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
-    ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GFX900-NEXT: $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.17, addrspace 5)
+    ; GFX900-NEXT: $vgpr62 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.16, addrspace 5)
+    ; GFX900-NEXT: $vgpr61 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.15, addrspace 5)
+    ; GFX900-NEXT: $vgpr60 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.14, addrspace 5)
+    ; GFX900-NEXT: $vgpr59 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.13, addrspace 5)
+    ; GFX900-NEXT: $vgpr58 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.12, addrspace 5)
+    ; GFX900-NEXT: $vgpr57 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.11, addrspace 5)
+    ; GFX900-NEXT: $vgpr56 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.10, addrspace 5)
+    ; GFX900-NEXT: $vgpr47 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
+    ; GFX900-NEXT: $vgpr46 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.8, addrspace 5)
+    ; GFX900-NEXT: $vgpr45 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.7, addrspace 5)
+    ; GFX900-NEXT: $vgpr44 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.6, addrspace 5)
+    ; GFX900-NEXT: $vgpr43 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.5, addrspace 5)
+    ; GFX900-NEXT: $vgpr42 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
+    ; GFX900-NEXT: $vgpr41 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
+    ; GFX900-NEXT: $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; GFX900-NEXT: S_ENDPGM 0, implicit $sgpr4, implicit $scc
     ;
     ; GFX90A-LABEL: name: materialize_fi_s_mov_b32_offset_96_live_scc__no_free_vgprs
@@ -759,11 +759,11 @@ body:             |
     ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55
     ; GFX90A-NEXT: S_NOP 0, implicit-def $vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
     ; GFX90A-NEXT: S_CMP_EQ_I32 $sgpr4, $sgpr5, implicit-def $scc
-    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.18, addrspace 5)
+    ; GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.18, addrspace 5)
     ; GFX90A-NEXT: $vgpr0 = V_LSHRREV_B32_e64 6, $sgpr32, implicit $exec
     ; GFX90A-NEXT: $vgpr0 = V_ADD_U32_e32 64, killed $vgpr0, implicit $exec
     ; GFX90A-NEXT: $sgpr4 = V_READFIRSTLANE_B32 $vgpr0, implicit $exec
-    ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.18, addrspace 5)
+    ; GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.18, addrspace 5)
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; GFX90A-NEXT: S_NOP 0, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23
diff --git a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
index 606609a5c6ae0..9eaebfdf611e0 100644
--- a/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
+++ b/llvm/test/CodeGen/AMDGPU/insert-waitcnts-crash.ll
@@ -13,7 +13,7 @@ define fastcc i32 @foo() {
   ; CHECK-NEXT:   $sgpr16 = S_MOV_B32 $sgpr33
   ; CHECK-NEXT:   $sgpr33 = S_MOV_B32 $sgpr32
   ; CHECK-NEXT:   $sgpr17 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; CHECK-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr17
   ; CHECK-NEXT:   $sgpr32 = frame-setup S_ADDK_I32 $sgpr32, 512, implicit-def dead $scc
   ; CHECK-NEXT:   $vgpr40 = V_WRITELANE_B32 killed $sgpr16, 2, undef $vgpr40
@@ -44,7 +44,7 @@ define fastcc i32 @foo() {
   ; CHECK-NEXT:   $sgpr32 = S_MOV_B32 $sgpr33
   ; CHECK-NEXT:   $sgpr4 = V_READLANE_B32 $vgpr40, 2
   ; CHECK-NEXT:   $sgpr5 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; CHECK-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; CHECK-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr5
   ; CHECK-NEXT:   $sgpr33 = S_MOV_B32 killed $sgpr4
   ; CHECK-NEXT:   S_WAITCNT 16240
diff --git a/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir
index 8a3ed8be536ba..1c93dee780312 100644
--- a/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir
+++ b/llvm/test/CodeGen/AMDGPU/issue98474-virtregrewriter-live-out-undef-subregisters.mir
@@ -271,7 +271,7 @@ body:             |
   ; CHECK-NEXT:   liveins: $sgpr30, $sgpr31, $sgpr34, $sgpr35, $sgpr36, $sgpr37, $vgpr0, $vgpr1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr40, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
   ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
   ; CHECK-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr30, 0, $vgpr40
   ; CHECK-NEXT:   $vgpr40 = SI_SPILL_S32_TO_VGPR $sgpr31, 1, $vgpr40
@@ -307,7 +307,7 @@ body:             |
   ; CHECK-NEXT:   $sgpr31 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 1
   ; CHECK-NEXT:   $sgpr30 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 0
   ; CHECK-NEXT:   $sgpr4_sgpr5 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $vgpr40 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
   ; CHECK-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
   bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir b/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir
index 2337c9ebf7689..819da917db9fb 100644
--- a/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir
+++ b/llvm/test/CodeGen/AMDGPU/kernel-mubuf-with-voffset.mir
@@ -36,7 +36,7 @@ body:             |
   ; CHECK-NEXT:   $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; CHECK-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; CHECK-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
   ; CHECK-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
index dccfe7e2d1656..33e8fb75431c2 100644
--- a/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
+++ b/llvm/test/CodeGen/AMDGPU/partial-regcopy-and-spill-missed-at-regalloc.ll
@@ -40,7 +40,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
   ; PEI-GFX908-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 7012362 /* regdef:VReg_128 */, def renamable $vgpr0_vgpr1_vgpr2_vgpr3
   ; PEI-GFX908-NEXT:   renamable $agpr0_agpr1_agpr2_agpr3 = COPY killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, implicit $exec
   ; PEI-GFX908-NEXT:   INLINEASM &"; def $0", 1 /* sideeffect attdialect */, 2818058 /* regdef:VReg_64 */, def renamable $vgpr0_vgpr1
-  ; PEI-GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+  ; PEI-GFX908-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
   ; PEI-GFX908-NEXT:   $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1
   ; PEI-GFX908-NEXT:   renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec
   ; PEI-GFX908-NEXT:   GLOBAL_STORE_DWORDX4 undef renamable $vgpr0_vgpr1, killed renamable $vgpr0_vgpr1_vgpr2_vgpr3, 0, 0, implicit $exec :: (volatile store (s128) into `ptr addrspace(1) poison`, addrspace 1)
@@ -49,7 +49,7 @@ define amdgpu_kernel void @partial_copy(<4 x i32> %arg) #0 {
   ; PEI-GFX908-NEXT:   renamable $vgpr0 = AV_MOV_B32_IMM_PSEUDO 1, implicit $exec
   ; PEI-GFX908-NEXT:   renamable $vgpr1 = AV_MOV_B32_IMM_PSEUDO 2, implicit $exec
   ; PEI-GFX908-NEXT:   renamable $agpr0_agpr1_agpr2_agpr3 = V_MFMA_I32_4X4X4I8_e64 killed $vgpr0, killed $vgpr1, killed $agpr0_agpr1_agpr2_agpr3, 0, 0, 0, implicit $mode, implicit $exec
-  ; PEI-GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+  ; PEI-GFX908-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, 0, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
   ; PEI-GFX908-NEXT:   $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1
   ; PEI-GFX908-NEXT:   GLOBAL_STORE_DWORDX2 undef renamable $vgpr0_vgpr1, killed renamable $vgpr0_vgpr1, 0, 0, implicit $exec :: (volatile store (s64) into `ptr addrspace(1) poison`, addrspace 1)
   ; PEI-GFX908-NEXT:   renamable $vgpr0_vgpr1_vgpr2_vgpr3 = COPY killed renamable $agpr0_agpr1_agpr2_agpr3, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir
index e5d28438ddc8a..6121b447228b8 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain-preserve.mir
@@ -36,12 +36,12 @@ body:             |
     ; GCN-LABEL: name: preserve_active_lanes_above_args
     ; GCN: liveins: $sgpr0, $vgpr8, $vgpr9, $vgpr10
     ; GCN-NEXT: {{  $}}
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST killed $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST killed $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec
     ; GCN-NEXT: $vgpr8 = COPY killed renamable $vgpr10
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
-    ; GCN-NEXT: $vgpr10 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr10 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
     renamable $vgpr10 = V_MOV_B32_e32 10, implicit $exec
     $vgpr8 = COPY renamable killed $vgpr10
@@ -70,7 +70,7 @@ body:             |
     ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr10, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
     ; GCN-NEXT: $vgpr10 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr10
@@ -81,7 +81,7 @@ body:             |
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
     ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr10 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr10 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
     ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
     S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
@@ -144,7 +144,7 @@ body:             |
     ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9, $vgpr10
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
     ; GCN-NEXT: $vgpr8 = SI_SPILL_S32_TO_VGPR $sgpr35, 0, killed $vgpr8
@@ -154,7 +154,7 @@ body:             |
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
     ; GCN-NEXT: $sgpr1 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr9(tied-def 0) :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr9(tied-def 0) :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
     ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8, implicit $vgpr9
     S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
diff --git a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir
index f5f640577e7ca..69d857e11ac9b 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-amdgpu-cs-chain.mir
@@ -38,15 +38,15 @@ body:             |
     ; GCN: liveins: $sgpr0, $sgpr35, $vgpr8, $vgpr9
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr8, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_ST $vgpr9, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
     ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
     ; GCN-NEXT: renamable $sgpr4_sgpr5 = S_LOAD_DWORDX2_IMM killed renamable $sgpr4_sgpr5, 0, 0 :: (dereferenceable invariant load (p0) from got, addrspace 4)
     ; GCN-NEXT: S_NOP 0, implicit-def $vgpr0, implicit-def $vgpr1, implicit-def $vgpr2, implicit-def $vgpr3, implicit-def $vgpr4, implicit-def $vgpr5, implicit-def $vgpr6, implicit-def $vgpr7
     ; GCN-NEXT: $sgpr1 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_ST 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: $vgpr8 = SCRATCH_LOAD_DWORD_ST 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr8(tied-def 0) :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr9 = SCRATCH_LOAD_DWORD_ST 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GCN-NEXT: $exec_lo = S_MOV_B32 killed $sgpr1
     ; GCN-NEXT: SI_CS_CHAIN_TC_W32 killed renamable $sgpr4_sgpr5, @callee, 0, -1, amdgpu_allvgprs, implicit $sgpr0, implicit $vgpr8
     renamable $sgpr4_sgpr5 = SI_PC_ADD_REL_OFFSET target-flags(amdgpu-gotprel32-lo) @callee + 4, target-flags(amdgpu-gotprel32-hi) @callee + 12, implicit-def dead $scc
diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir
index 71d3c77bfe203..f35a34659e6dd 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-build-av-spill.mir
@@ -21,8 +21,8 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v1
     ; MUBUF: $vgpr0 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v1
@@ -35,8 +35,8 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v1
     ; FLATSCR: $vgpr0 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v1
@@ -49,8 +49,8 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v1
     ; MUBUF-GFX90A: $vgpr0 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v1
@@ -63,8 +63,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v1
     ; FLATSCR-GFX90A: $vgpr0 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v1
@@ -94,10 +94,10 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v2
     ; MUBUF: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v2
@@ -112,8 +112,8 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v2
     ; FLATSCR: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v2
@@ -128,10 +128,10 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v2
     ; MUBUF-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v2
@@ -146,8 +146,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v2
     ; FLATSCR-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v2
@@ -179,12 +179,12 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v3
     ; MUBUF: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v3
@@ -201,8 +201,8 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v3
     ; FLATSCR: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s96) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s96) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v3
@@ -219,12 +219,12 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v3
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v3
@@ -241,8 +241,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v3
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s96) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s96) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v3
@@ -276,14 +276,14 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v4
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v4
@@ -302,8 +302,8 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v4
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v4
@@ -322,14 +322,14 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v4
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v4
@@ -348,8 +348,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v4
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v4
@@ -385,16 +385,16 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v5
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v5
@@ -415,10 +415,10 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v5
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v5
@@ -439,16 +439,16 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v5
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v5
@@ -469,10 +469,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v5
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v5
@@ -510,18 +510,18 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v6
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v6
@@ -544,10 +544,10 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v6
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s64) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s64) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v6
@@ -570,18 +570,18 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v6
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v6
@@ -604,10 +604,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v6
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s64) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s64) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v6
@@ -647,20 +647,20 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v7
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v7
@@ -685,10 +685,10 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v7
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr4_vgpr5_vgpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" store (s96) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr4_vgpr5_vgpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-thread-private" store (s96) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s96) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v7
@@ -713,20 +713,20 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v7
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v7
@@ -751,10 +751,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v7
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr4_vgpr5_vgpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" store (s96) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr4_vgpr5_vgpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-thread-private" store (s96) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s96) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v7
@@ -796,22 +796,22 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v8
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v8
@@ -838,10 +838,10 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v8
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v8
@@ -868,22 +868,22 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v8
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v8
@@ -910,10 +910,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v8
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v8
@@ -957,38 +957,38 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v16
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v16
@@ -1031,14 +1031,14 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v16
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 48, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v16
@@ -1081,38 +1081,38 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v16
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v16
@@ -1155,14 +1155,14 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v16
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 48, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v16
@@ -1222,70 +1222,70 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_av_v32
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
-    ; MUBUF-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
-    ; MUBUF-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
-    ; MUBUF-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
-    ; MUBUF-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
-    ; MUBUF-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
-    ; MUBUF-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
-    ; MUBUF-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
-    ; MUBUF-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
-    ; MUBUF-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
-    ; MUBUF-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
-    ; MUBUF-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
-    ; MUBUF-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
-    ; MUBUF-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
-    ; MUBUF-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
-    ; MUBUF-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
-    ; MUBUF-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 64, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 68, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 72, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 76, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 80, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 84, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 88, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 92, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 96, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 100, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 104, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 108, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 112, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 116, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 120, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 124, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 64, addrspace 5)
+    ; MUBUF-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 68, addrspace 5)
+    ; MUBUF-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 72, addrspace 5)
+    ; MUBUF-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 76, addrspace 5)
+    ; MUBUF-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 80, addrspace 5)
+    ; MUBUF-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 84, addrspace 5)
+    ; MUBUF-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 88, addrspace 5)
+    ; MUBUF-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 92, addrspace 5)
+    ; MUBUF-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 96, addrspace 5)
+    ; MUBUF-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 100, addrspace 5)
+    ; MUBUF-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 104, addrspace 5)
+    ; MUBUF-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 108, addrspace 5)
+    ; MUBUF-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 112, addrspace 5)
+    ; MUBUF-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 116, addrspace 5)
+    ; MUBUF-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 120, addrspace 5)
+    ; MUBUF-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 124, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_av_v32
@@ -1360,22 +1360,22 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_av_v32
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 112, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 112, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_av_v32
@@ -1450,70 +1450,70 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_v32
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 64, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 68, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 72, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 76, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 80, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 84, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 88, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 92, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 96, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 100, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 104, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 108, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 112, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 116, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 120, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 124, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 64, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 68, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 72, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 76, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 80, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 84, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 88, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 92, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 96, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 100, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 104, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 108, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 112, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 116, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 120, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 124, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_v32
@@ -1588,22 +1588,22 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_v32
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 112, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 112, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_v32
@@ -1696,8 +1696,8 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a1
     ; MUBUF: $agpr0 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -1712,8 +1712,8 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a1
     ; FLATSCR: $agpr0 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -1727,8 +1727,8 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a1
     ; MUBUF-GFX90A: $agpr0 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a1
@@ -1741,8 +1741,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a1
     ; FLATSCR-GFX90A: $agpr0 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a1
@@ -1773,12 +1773,12 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a2
     ; MUBUF: $agpr0_agpr1 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -1795,12 +1795,12 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a2
     ; FLATSCR: $agpr0_agpr1 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -1816,10 +1816,10 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a2
     ; MUBUF-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a2
@@ -1834,8 +1834,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a2
     ; FLATSCR-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a2
@@ -1868,16 +1868,16 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a3
     ; MUBUF: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -1896,16 +1896,16 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a3
     ; FLATSCR: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -1923,12 +1923,12 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a3
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a3
@@ -1945,8 +1945,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a3
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s96) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s96) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a3
@@ -1981,20 +1981,20 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a4
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2015,20 +2015,20 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a4
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2048,14 +2048,14 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a4
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a4
@@ -2074,8 +2074,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a4
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a4
@@ -2112,24 +2112,24 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a5
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2152,24 +2152,24 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a5
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2191,16 +2191,16 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a5
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a5
@@ -2221,10 +2221,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a5
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a5
@@ -2263,28 +2263,28 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a6
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2309,28 +2309,28 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a6
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2354,18 +2354,18 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a6
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a6
@@ -2388,10 +2388,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a6
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr4_agpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s64) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr4_agpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" store (s64) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a6
@@ -2432,32 +2432,32 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a7
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2484,32 +2484,32 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a7
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2535,20 +2535,20 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a7
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a7
@@ -2573,10 +2573,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a7
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr4_agpr5_agpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" store (s96) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr4_agpr5_agpr6, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-thread-private" store (s96) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s96) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a7
@@ -2619,36 +2619,36 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a8
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2677,36 +2677,36 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a8
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2734,22 +2734,22 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a8
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a8
@@ -2776,10 +2776,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a8
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a8
@@ -2824,40 +2824,40 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a9
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2888,40 +2888,40 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a9
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2951,24 +2951,24 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a9
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a9
@@ -2997,12 +2997,12 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a9
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr8, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr8, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr8 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a9
@@ -3049,44 +3049,44 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a10
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -3119,44 +3119,44 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a10
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -3188,26 +3188,26 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a10
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a10
@@ -3238,12 +3238,12 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a10
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr8_agpr9, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" store (s64) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr8_agpr9, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-thread-private" store (s64) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.0 + 32, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a10
@@ -3292,48 +3292,48 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a11
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -3368,48 +3368,48 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a11
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -3443,28 +3443,28 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a11
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a11
@@ -3497,12 +3497,12 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a11
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr8_agpr9_agpr10, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" store (s96) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr8_agpr9_agpr10, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-thread-private" store (s96) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s96) from %stack.0 + 32, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a11
@@ -3553,52 +3553,52 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a12
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -3635,52 +3635,52 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a12
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -3716,30 +3716,30 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a12
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a12
@@ -3774,12 +3774,12 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a12
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 32, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a12
@@ -3832,68 +3832,68 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a16
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
     ; MUBUF-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
     ; MUBUF-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
     ; MUBUF-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -3938,68 +3938,68 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a16
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
     ; FLATSCR-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
     ; FLATSCR-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
     ; FLATSCR-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
     ; FLATSCR-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -4043,38 +4043,38 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a16
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a16
@@ -4117,14 +4117,14 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a16
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 48, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a16
@@ -4185,132 +4185,132 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_av_a32
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 64, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 68, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 72, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 76, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 80, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 84, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 88, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 92, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 96, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 100, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 104, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 108, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 112, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 116, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 120, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 124, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
     ; MUBUF-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
     ; MUBUF-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
     ; MUBUF-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 64, addrspace 5)
     ; MUBUF-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 68, addrspace 5)
     ; MUBUF-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 72, addrspace 5)
     ; MUBUF-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 76, addrspace 5)
     ; MUBUF-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 80, addrspace 5)
     ; MUBUF-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 84, addrspace 5)
     ; MUBUF-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 88, addrspace 5)
     ; MUBUF-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 92, addrspace 5)
     ; MUBUF-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 96, addrspace 5)
     ; MUBUF-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 100, addrspace 5)
     ; MUBUF-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 104, addrspace 5)
     ; MUBUF-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 108, addrspace 5)
     ; MUBUF-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 112, addrspace 5)
     ; MUBUF-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 116, addrspace 5)
     ; MUBUF-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 120, addrspace 5)
     ; MUBUF-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 124, addrspace 5)
     ; MUBUF-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -4387,132 +4387,132 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_av_a32
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 64, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 68, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 72, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 76, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 80, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 84, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 88, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 92, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 96, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 100, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 104, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 108, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 112, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 116, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 120, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 124, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
     ; FLATSCR-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
     ; FLATSCR-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
     ; FLATSCR-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
     ; FLATSCR-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 64, addrspace 5)
     ; FLATSCR-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 68, addrspace 5)
     ; FLATSCR-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 72, addrspace 5)
     ; FLATSCR-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 76, addrspace 5)
     ; FLATSCR-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 80, addrspace 5)
     ; FLATSCR-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 84, addrspace 5)
     ; FLATSCR-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 88, addrspace 5)
     ; FLATSCR-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 92, addrspace 5)
     ; FLATSCR-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 96, addrspace 5)
     ; FLATSCR-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 100, addrspace 5)
     ; FLATSCR-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 104, addrspace 5)
     ; FLATSCR-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 108, addrspace 5)
     ; FLATSCR-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 112, addrspace 5)
     ; FLATSCR-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 116, addrspace 5)
     ; FLATSCR-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 120, addrspace 5)
     ; FLATSCR-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 124, addrspace 5)
     ; FLATSCR-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -4588,70 +4588,70 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_av_a32
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 64, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 68, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 72, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 76, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 80, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 84, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 88, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 92, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 96, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 100, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 104, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 108, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 112, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 116, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 120, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 124, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 64, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 68, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 72, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 76, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 80, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 84, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 88, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 92, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 96, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 100, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 104, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 108, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 112, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 116, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 120, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 124, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_av_a32
@@ -4726,22 +4726,22 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_av_a32
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr16_agpr17_agpr18_agpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr20_agpr21_agpr22_agpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr24_agpr25_agpr26_agpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr28_agpr29_agpr30_agpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 112, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr16_agpr17_agpr18_agpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr20_agpr21_agpr22_agpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr24_agpr25_agpr26_agpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr28_agpr29_agpr30_agpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr16_agpr17_agpr18_agpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr20_agpr21_agpr22_agpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr24_agpr25_agpr26_agpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr28_agpr29_agpr30_agpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr16_agpr17_agpr18_agpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr20_agpr21_agpr22_agpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr24_agpr25_agpr26_agpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr28_agpr29_agpr30_agpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 112, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_av_a32
diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir
index 5d59878a04ba5..4b25392aaa797 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill-partial-agpr.mir
@@ -60,9 +60,9 @@ body:             |
     ; MUBUF-V2A: liveins: $agpr0
     ; MUBUF-V2A-NEXT: {{  $}}
     ; MUBUF-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit killed $vgpr0_vgpr1
-    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1
     ; MUBUF-V2A-NEXT: S_ENDPGM 0
     ;
@@ -71,9 +71,9 @@ body:             |
     ; FLATSCR-V2A-NEXT: {{  $}}
     ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = IMPLICIT_DEF
     ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1
-    ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-V2A-NEXT: S_ENDPGM 0
     $vgpr0_vgpr1 = IMPLICIT_DEF
     SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, align 4, addrspace 5)
@@ -97,11 +97,11 @@ body:             |
     ; MUBUF-V2A: liveins: $agpr0
     ; MUBUF-V2A-NEXT: {{  $}}
     ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2
-    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2
     ; MUBUF-V2A-NEXT: S_ENDPGM 0
     ;
@@ -110,9 +110,9 @@ body:             |
     ; FLATSCR-V2A-NEXT: {{  $}}
     ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
     ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2
-    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" load (s64) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: S_ENDPGM 0
     $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
     SI_SPILL_V96_SAVE killed $vgpr0_vgpr1_vgpr2, %stack.0, $sgpr32, 0, implicit $exec :: (store (s96) into %stack.0, align 4, addrspace 5)
@@ -136,11 +136,11 @@ body:             |
     ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2
     ; MUBUF-V2A-NEXT: {{  $}}
     ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3
-    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
@@ -153,11 +153,11 @@ body:             |
     ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
     ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-V2A-NEXT: S_ENDPGM 0
     $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
     SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5)
@@ -181,13 +181,13 @@ body:             |
     ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2
     ; MUBUF-V2A-NEXT: {{  $}}
     ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
-    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
@@ -199,11 +199,11 @@ body:             |
     ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
     ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
     ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
     ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
     ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
-    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" load (s64) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4
     ; FLATSCR-V2A-NEXT: S_ENDPGM 0
     $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
@@ -228,13 +228,13 @@ body:             |
     ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
     ; MUBUF-V2A-NEXT: {{  $}}
     ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
-    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
@@ -249,13 +249,13 @@ body:             |
     ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     ; FLATSCR-V2A-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     ; FLATSCR-V2A-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     ; FLATSCR-V2A-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
-    ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     ; FLATSCR-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5
     ; FLATSCR-V2A-NEXT: S_ENDPGM 0
@@ -281,18 +281,18 @@ body:             |
     ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3
     ; MUBUF-V2A-NEXT: {{  $}}
     ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-V2A-NEXT: $vgpr4 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec
@@ -303,12 +303,12 @@ body:             |
     ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3
     ; FLATSCR-V2A-NEXT: {{  $}}
     ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr7, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr6, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr5, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr4, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
-    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $vgpr7 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; FLATSCR-V2A-NEXT: $vgpr6 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
     ; FLATSCR-V2A-NEXT: $vgpr5 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7
@@ -336,33 +336,33 @@ body:             |
     ; MUBUF-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
     ; MUBUF-V2A-NEXT: {{  $}}
     ; MUBUF-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-V2A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
     ; MUBUF-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec
     ; MUBUF-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-V2A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-V2A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
     ; MUBUF-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr12 = V_ACCVGPR_READ_B32_e64 $agpr3, implicit $exec
     ; MUBUF-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec
@@ -374,18 +374,18 @@ body:             |
     ; FLATSCR-V2A: liveins: $agpr0, $agpr1, $agpr2, $agpr3, $agpr4
     ; FLATSCR-V2A-NEXT: {{  $}}
     ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr11, implicit $exec
-    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr8_vgpr9_vgpr10, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr8_vgpr9_vgpr10, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s96) into %stack.0 + 32, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr15, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; FLATSCR-V2A-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr14, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; FLATSCR-V2A-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr13, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; FLATSCR-V2A-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr12, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
-    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-V2A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $vgpr11 = V_ACCVGPR_READ_B32_e64 $agpr4, implicit $exec
-    ; FLATSCR-V2A-NEXT: $vgpr8_vgpr9_vgpr10 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-V2A-NEXT: $vgpr8_vgpr9_vgpr10 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s96) from %stack.0 + 32, align 4, addrspace 5)
     ; FLATSCR-V2A-NEXT: $vgpr15 = V_ACCVGPR_READ_B32_e64 $agpr0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; FLATSCR-V2A-NEXT: $vgpr14 = V_ACCVGPR_READ_B32_e64 $agpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
     ; FLATSCR-V2A-NEXT: $vgpr13 = V_ACCVGPR_READ_B32_e64 $agpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15
diff --git a/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir
index c807342bb3951..8c6283dc2dbc5 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-build-spill.mir
@@ -21,8 +21,8 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v1
     ; MUBUF: $vgpr0 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v1
@@ -35,8 +35,8 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v1
     ; FLATSCR: $vgpr0 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v1
@@ -49,8 +49,8 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v1
     ; MUBUF-GFX90A: $vgpr0 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v1
@@ -63,8 +63,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v1
     ; FLATSCR-GFX90A: $vgpr0 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v1
@@ -94,10 +94,10 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v2
     ; MUBUF: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v2
@@ -112,8 +112,8 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v2
     ; FLATSCR: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v2
@@ -128,10 +128,10 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v2
     ; MUBUF-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v2
@@ -146,8 +146,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v2
     ; FLATSCR-GFX90A: $vgpr0_vgpr1 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v2
@@ -179,12 +179,12 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v3
     ; MUBUF: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v3
@@ -201,8 +201,8 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v3
     ; FLATSCR: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s96) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s96) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v3
@@ -219,12 +219,12 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v3
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v3
@@ -241,8 +241,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v3
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s96) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s96) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v3
@@ -276,14 +276,14 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v4
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v4
@@ -302,8 +302,8 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v4
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v4
@@ -322,14 +322,14 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v4
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v4
@@ -348,8 +348,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v4
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v4
@@ -385,16 +385,16 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v5
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v5
@@ -415,10 +415,10 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v5
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v5
@@ -439,16 +439,16 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v5
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v5
@@ -469,10 +469,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v5
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v5
@@ -510,18 +510,18 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v6
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v6
@@ -544,10 +544,10 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v6
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s64) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s64) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v6
@@ -570,18 +570,18 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v6
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v6
@@ -604,10 +604,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v6
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" store (s64) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr4_vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" store (s64) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v6
@@ -647,22 +647,22 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v8
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v8
@@ -689,10 +689,10 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v8
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v8
@@ -719,22 +719,22 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v8
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v8
@@ -761,10 +761,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v8
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v8
@@ -808,38 +808,38 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v16
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v16
@@ -882,14 +882,14 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v16
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 48, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v16
@@ -932,38 +932,38 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v16
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v16
@@ -1006,14 +1006,14 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v16
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 48, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v16
@@ -1073,70 +1073,70 @@ body:             |
   bb.0.entry:
     ; MUBUF-LABEL: name: test_spill_v32
     ; MUBUF: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
-    ; MUBUF-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
-    ; MUBUF-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
-    ; MUBUF-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
-    ; MUBUF-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
-    ; MUBUF-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
-    ; MUBUF-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
-    ; MUBUF-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
-    ; MUBUF-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
-    ; MUBUF-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
-    ; MUBUF-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
-    ; MUBUF-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
-    ; MUBUF-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
-    ; MUBUF-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
-    ; MUBUF-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
-    ; MUBUF-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
-    ; MUBUF-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 64, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 68, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 72, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 76, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 80, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 84, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 88, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 92, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 96, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 100, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 104, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 108, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 112, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 116, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 120, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 124, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 64, addrspace 5)
+    ; MUBUF-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 68, addrspace 5)
+    ; MUBUF-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 72, addrspace 5)
+    ; MUBUF-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 76, addrspace 5)
+    ; MUBUF-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 80, addrspace 5)
+    ; MUBUF-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 84, addrspace 5)
+    ; MUBUF-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 88, addrspace 5)
+    ; MUBUF-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 92, addrspace 5)
+    ; MUBUF-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 96, addrspace 5)
+    ; MUBUF-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 100, addrspace 5)
+    ; MUBUF-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 104, addrspace 5)
+    ; MUBUF-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 108, addrspace 5)
+    ; MUBUF-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 112, addrspace 5)
+    ; MUBUF-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 116, addrspace 5)
+    ; MUBUF-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 120, addrspace 5)
+    ; MUBUF-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 124, addrspace 5)
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-V2A-LABEL: name: test_spill_v32
@@ -1211,22 +1211,22 @@ body:             |
     ;
     ; FLATSCR-LABEL: name: test_spill_v32
     ; FLATSCR: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 112, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 112, align 4, addrspace 5)
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-V2A-LABEL: name: test_spill_v32
@@ -1301,70 +1301,70 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_v32
     ; MUBUF-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 64, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 68, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 72, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 76, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 80, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 84, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 88, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 92, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 96, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 100, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 104, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 108, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 112, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 116, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 120, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 124, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 64, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 68, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 72, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 76, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 80, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 84, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 88, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 92, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 96, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 100, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 104, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 108, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 112, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 116, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 120, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 124, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_v32
@@ -1439,22 +1439,22 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_v32
     ; FLATSCR-GFX90A: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 112, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr0_vgpr1_vgpr2_vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr4_vgpr5_vgpr6_vgpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr8_vgpr9_vgpr10_vgpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr12_vgpr13_vgpr14_vgpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr16_vgpr17_vgpr18_vgpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr24_vgpr25_vgpr26_vgpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr28_vgpr29_vgpr30_vgpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr0_vgpr1_vgpr2_vgpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr4_vgpr5_vgpr6_vgpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr8_vgpr9_vgpr10_vgpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr12_vgpr13_vgpr14_vgpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr16_vgpr17_vgpr18_vgpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr20_vgpr21_vgpr22_vgpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr24_vgpr25_vgpr26_vgpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $vgpr28_vgpr29_vgpr30_vgpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 112, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_v32
@@ -1547,8 +1547,8 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a1
     ; MUBUF: $agpr0 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -1563,8 +1563,8 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a1
     ; FLATSCR: $agpr0 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -1578,8 +1578,8 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a1
     ; MUBUF-GFX90A: $agpr0 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a1
@@ -1592,8 +1592,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a1
     ; FLATSCR-GFX90A: $agpr0 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a1
@@ -1624,12 +1624,12 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a2
     ; MUBUF: $agpr0_agpr1 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -1646,12 +1646,12 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a2
     ; FLATSCR: $agpr0_agpr1 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -1667,10 +1667,10 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a2
     ; MUBUF-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a2
@@ -1685,8 +1685,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a2
     ; FLATSCR-GFX90A: $agpr0_agpr1 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a2
@@ -1719,16 +1719,16 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a3
     ; MUBUF: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -1747,16 +1747,16 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a3
     ; FLATSCR: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -1774,12 +1774,12 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a3
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2, implicit $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a3
@@ -1796,8 +1796,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a3
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s96) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s96) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a3
@@ -1832,20 +1832,20 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a4
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -1866,20 +1866,20 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a4
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -1899,14 +1899,14 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a4
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a4
@@ -1925,8 +1925,8 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a4
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a4
@@ -1963,24 +1963,24 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a5
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2003,24 +2003,24 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a5
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2042,16 +2042,16 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a5
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a5
@@ -2072,10 +2072,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a5
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4, implicit $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr4, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a5
@@ -2114,28 +2114,28 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a6
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2160,28 +2160,28 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a6
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2205,18 +2205,18 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a6
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a6
@@ -2239,10 +2239,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a6
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr4_agpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" store (s64) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr4_agpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" store (s64) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a6
@@ -2283,36 +2283,36 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a8
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2341,36 +2341,36 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a8
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2398,22 +2398,22 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a8
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a8
@@ -2440,10 +2440,10 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a8
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a8
@@ -2488,68 +2488,68 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a16
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
     ; MUBUF-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
     ; MUBUF-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
     ; MUBUF-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -2594,68 +2594,68 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a16
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
     ; FLATSCR-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
     ; FLATSCR-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
     ; FLATSCR-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
     ; FLATSCR-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -2699,38 +2699,38 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a16
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a16
@@ -2773,14 +2773,14 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a16
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 48, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a16
@@ -2841,132 +2841,132 @@ body:             |
     ; MUBUF-LABEL: name: test_spill_a32
     ; MUBUF: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 64, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 68, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 72, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 76, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 80, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 84, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 88, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 92, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 96, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 100, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 104, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 108, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 112, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 116, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 120, addrspace 5)
     ; MUBUF-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 124, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; MUBUF-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; MUBUF-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; MUBUF-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; MUBUF-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; MUBUF-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; MUBUF-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; MUBUF-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; MUBUF-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; MUBUF-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
     ; MUBUF-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
     ; MUBUF-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
     ; MUBUF-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
     ; MUBUF-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
     ; MUBUF-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
     ; MUBUF-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
     ; MUBUF-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 64, addrspace 5)
     ; MUBUF-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 68, addrspace 5)
     ; MUBUF-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 72, addrspace 5)
     ; MUBUF-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 76, addrspace 5)
     ; MUBUF-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 80, addrspace 5)
     ; MUBUF-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 84, addrspace 5)
     ; MUBUF-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 88, addrspace 5)
     ; MUBUF-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 92, addrspace 5)
     ; MUBUF-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 96, addrspace 5)
     ; MUBUF-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 100, addrspace 5)
     ; MUBUF-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 104, addrspace 5)
     ; MUBUF-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 108, addrspace 5)
     ; MUBUF-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 112, addrspace 5)
     ; MUBUF-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 116, addrspace 5)
     ; MUBUF-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 120, addrspace 5)
     ; MUBUF-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
+    ; MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 124, addrspace 5)
     ; MUBUF-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; MUBUF-NEXT: S_ENDPGM 0
     ;
@@ -3043,132 +3043,132 @@ body:             |
     ; FLATSCR-LABEL: name: test_spill_a32
     ; FLATSCR: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr4, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr5, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr6, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr7, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr8, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr9, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr10, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr11, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr12, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr13, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr14, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr15, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr16, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 64, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr17, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 68, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr18, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 72, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr19, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 76, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr20, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 80, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr21, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 84, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr22, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 88, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr23, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 92, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr24, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 96, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr25, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 100, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr26, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 104, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr27, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 108, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr28, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 112, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr29, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 116, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr30, implicit $exec
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0 + 120, addrspace 5)
     ; FLATSCR-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr31, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 124, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 124, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; FLATSCR-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
     ; FLATSCR-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
     ; FLATSCR-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
     ; FLATSCR-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
     ; FLATSCR-NEXT: $agpr4 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
     ; FLATSCR-NEXT: $agpr5 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
     ; FLATSCR-NEXT: $agpr6 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
     ; FLATSCR-NEXT: $agpr7 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
     ; FLATSCR-NEXT: $agpr8 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 36, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
     ; FLATSCR-NEXT: $agpr9 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 40, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
     ; FLATSCR-NEXT: $agpr10 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
     ; FLATSCR-NEXT: $agpr11 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
     ; FLATSCR-NEXT: $agpr12 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 52, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
     ; FLATSCR-NEXT: $agpr13 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 56, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
     ; FLATSCR-NEXT: $agpr14 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 60, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
     ; FLATSCR-NEXT: $agpr15 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 64, addrspace 5)
     ; FLATSCR-NEXT: $agpr16 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 68, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 68, addrspace 5)
     ; FLATSCR-NEXT: $agpr17 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 72, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 72, addrspace 5)
     ; FLATSCR-NEXT: $agpr18 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 76, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 76, addrspace 5)
     ; FLATSCR-NEXT: $agpr19 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 80, addrspace 5)
     ; FLATSCR-NEXT: $agpr20 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 84, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 84, addrspace 5)
     ; FLATSCR-NEXT: $agpr21 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 88, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 88, addrspace 5)
     ; FLATSCR-NEXT: $agpr22 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 92, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 92, addrspace 5)
     ; FLATSCR-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 96, addrspace 5)
     ; FLATSCR-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 100, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 100, addrspace 5)
     ; FLATSCR-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 104, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 104, addrspace 5)
     ; FLATSCR-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 108, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 108, addrspace 5)
     ; FLATSCR-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 112, addrspace 5)
     ; FLATSCR-NEXT: $agpr28 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 116, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 116, addrspace 5)
     ; FLATSCR-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 120, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 120, addrspace 5)
     ; FLATSCR-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
+    ; FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 124, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0 + 124, addrspace 5)
     ; FLATSCR-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
     ; FLATSCR-NEXT: S_ENDPGM 0
     ;
@@ -3244,70 +3244,70 @@ body:             |
     ;
     ; MUBUF-GFX90A-LABEL: name: test_spill_a32
     ; MUBUF-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 64, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 68, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 72, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 76, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 80, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 84, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 88, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 92, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 96, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 100, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 104, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 108, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 112, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 116, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 120, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 124, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 4, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 8, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 12, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 16, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 20, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 24, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 28, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 32, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 36, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 40, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 44, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 48, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 52, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 56, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 60, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 64, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 68, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 72, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 76, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 80, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 84, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 88, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 92, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 96, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 100, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 104, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 108, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 112, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 116, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 120, addrspace 5)
-    ; MUBUF-GFX90A-NEXT: $agpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0 + 124, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr4, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr5, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr6, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr7, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr8, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr9, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr10, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr11, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr12, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr13, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr14, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr15, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr16, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 64, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr17, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 68, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr18, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 72, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr19, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 76, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr20, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 80, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr21, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 84, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr22, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 88, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr23, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 92, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr24, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 96, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr25, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 100, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr26, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 104, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr27, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 108, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr28, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 112, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr29, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 116, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr30, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 120, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: BUFFER_STORE_DWORD_OFFSET killed $agpr31, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 124, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 4, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 8, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 12, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 16, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 20, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 24, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 28, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 32, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 36, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 40, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 44, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 48, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 52, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 56, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 60, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 64, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 68, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 72, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 76, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 80, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 84, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 88, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 92, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 96, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 100, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 104, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 108, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 112, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 116, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 120, addrspace 5)
+    ; MUBUF-GFX90A-NEXT: $agpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0 + 124, addrspace 5)
     ; MUBUF-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; MUBUF-GFX90A-V2A-LABEL: name: test_spill_a32
@@ -3382,22 +3382,22 @@ body:             |
     ;
     ; FLATSCR-GFX90A-LABEL: name: test_spill_a32
     ; FLATSCR-GFX90A: $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 = IMPLICIT_DEF
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr16_agpr17_agpr18_agpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr20_agpr21_agpr22_agpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr24_agpr25_agpr26_agpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr28_agpr29_agpr30_agpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" store (s128) into %stack.0 + 112, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-non-volatile" load (s128) from %stack.0, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 16, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 32, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 48, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr16_agpr17_agpr18_agpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 64, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr20_agpr21_agpr22_agpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 80, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr24_agpr25_agpr26_agpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 96, align 4, addrspace 5)
-    ; FLATSCR-GFX90A-NEXT: $agpr28_agpr29_agpr30_agpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr0_agpr1_agpr2_agpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" store (s128) into %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr4_agpr5_agpr6_agpr7, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr8_agpr9_agpr10_agpr11, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr12_agpr13_agpr14_agpr15, $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr16_agpr17_agpr18_agpr19, $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr20_agpr21_agpr22_agpr23, $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr24_agpr25_agpr26_agpr27, $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $agpr28_agpr29_agpr30_agpr31, $sgpr32, 112, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" store (s128) into %stack.0 + 112, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr0_agpr1_agpr2_agpr3 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15_agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23_agpr24_agpr25_agpr26_agpr27_agpr28_agpr29_agpr30_agpr31 :: ("amdgpu-thread-private" load (s128) from %stack.0, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr4_agpr5_agpr6_agpr7 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 16, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr8_agpr9_agpr10_agpr11 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 32, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr12_agpr13_agpr14_agpr15 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 48, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 48, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr16_agpr17_agpr18_agpr19 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 64, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr20_agpr21_agpr22_agpr23 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 80, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 80, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr24_agpr25_agpr26_agpr27 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 96, align 4, addrspace 5)
+    ; FLATSCR-GFX90A-NEXT: $agpr28_agpr29_agpr30_agpr31 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 112, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.0 + 112, align 4, addrspace 5)
     ; FLATSCR-GFX90A-NEXT: S_ENDPGM 0
     ;
     ; FLATSCR-GFX90A-V2A-LABEL: name: test_spill_a32
diff --git a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
index 374dd2fea0270..17b3e506f5300 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-reg-scavenger-position.mir
@@ -30,14 +30,14 @@ body:             |
   ; CHECK-NEXT:   $sgpr0 = S_ADD_U32 $sgpr0, $sgpr4, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT:   $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT:   $sgpr4 = S_MOV_B32 524288
-  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, align 8192, addrspace 5)
+  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, align 8192, addrspace 5)
   ; CHECK-NEXT:   S_BRANCH %bb.1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT: bb.1:
   ; CHECK-NEXT:   liveins: $sgpr0_sgpr1_sgpr2_sgpr3
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $sgpr4 = S_MOV_B32 524288
-  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, align 8192, addrspace 5)
+  ; CHECK-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr4, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, align 8192, addrspace 5)
   ; CHECK-NEXT:   S_ENDPGM 0, implicit $vgpr0
   bb.0:
     $vgpr0 = SI_SPILL_V32_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
index 8188d51e205f6..1e3158001b6f6 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-scavenge-vgpr-spill.mir
@@ -31,7 +31,7 @@ body:             |
     ; GFX8-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
     ; GFX8-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GFX8-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
-    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; GFX8-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
     ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
     ; GFX8-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
     ; GFX8-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2
@@ -49,7 +49,7 @@ body:             |
     ; GFX8-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
     ; GFX8-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GFX8-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
-    ; GFX8-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; GFX8-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
     ; GFX8-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
     ; GFX8-NEXT: $sgpr33 = COPY $sgpr4
     ; GFX8-NEXT: S_ENDPGM 0, amdgpu_allvgprs
@@ -62,7 +62,7 @@ body:             |
     ; GFX9-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294443008, implicit-def dead $scc
     ; GFX9-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GFX9-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
     ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
     ; GFX9-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
     ; GFX9-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2
@@ -78,7 +78,7 @@ body:             |
     ; GFX9-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
     ; GFX9-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GFX9-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 1048832, implicit-def dead $scc
-    ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; GFX9-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, killed $sgpr5, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
     ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
     ; GFX9-NEXT: $sgpr33 = COPY $sgpr4
     ; GFX9-NEXT: S_ENDPGM 0, amdgpu_allvgprs
@@ -91,7 +91,7 @@ body:             |
     ; GFX9-FLATSCR-NEXT: $sgpr33 = frame-setup S_AND_B32 killed $sgpr33, 4294959104, implicit-def dead $scc
     ; GFX9-FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GFX9-FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc
-    ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; GFX9-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
     ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
     ; GFX9-FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr2
     ; GFX9-FLATSCR-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr34, 1, undef $vgpr2
@@ -106,7 +106,7 @@ body:             |
     ; GFX9-FLATSCR-NEXT: $sgpr34 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 1
     ; GFX9-FLATSCR-NEXT: $sgpr6_sgpr7 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
     ; GFX9-FLATSCR-NEXT: $sgpr5 = S_ADD_I32 $sgpr33, 16388, implicit-def dead $scc
-    ; GFX9-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; GFX9-FLATSCR-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr5, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
     ; GFX9-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7
     ; GFX9-FLATSCR-NEXT: $sgpr33 = COPY $sgpr4
     ; GFX9-FLATSCR-NEXT: S_ENDPGM 0, amdgpu_allvgprs
diff --git a/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir b/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir
index a49e509904533..e41fc0c169b90 100644
--- a/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir
+++ b/llvm/test/CodeGen/AMDGPU/pei-vgpr-block-spill-csr.mir
@@ -27,10 +27,10 @@ body: |
     ; CHECK: liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $m0 = S_MOV_B32 9
-    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.0, align 4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-thread-private" store (s1024) into %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45
     ; CHECK-NEXT: $m0 = S_MOV_B32 9
-    ; CHECK-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: ("amdgpu-non-volatile" load (s1024) from %stack.0, align 4, addrspace 5)
+    ; CHECK-NEXT: $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: ("amdgpu-thread-private" load (s1024) from %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
     S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45
     S_SETPC_B64_return $sgpr30_sgpr31
@@ -51,10 +51,10 @@ body: |
     ; CHECK: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $m0 = S_MOV_B32 16711935
-    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.0, align 4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-thread-private" store (s1024) into %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit-def $vgpr65, implicit-def $vgpr66
     ; CHECK-NEXT: $m0 = S_MOV_B32 16711935
-    ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" load (s1024) from %stack.0, align 4, addrspace 5)
+    ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-thread-private" load (s1024) from %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
     S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr42, implicit-def $vgpr43, implicit-def $vgpr44, implicit-def $vgpr45, implicit-def $vgpr46, implicit-def $vgpr47, implicit-def $vgpr48, implicit-def $vgpr49, implicit-def $vgpr50, implicit-def $vgpr51, implicit-def $vgpr52, implicit-def $vgpr53, implicit-def $vgpr54, implicit-def $vgpr55, implicit-def $vgpr56, implicit-def $vgpr57, implicit-def $vgpr58, implicit-def $vgpr59, implicit-def $vgpr60, implicit-def $vgpr61, implicit-def $vgpr62, implicit-def $vgpr63, implicit-def $vgpr64, implicit-def $vgpr65, implicit-def $vgpr66
     S_SETPC_B64_return $sgpr30_sgpr31
@@ -79,18 +79,18 @@ body: |
     ; CHECK: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $m0 = S_MOV_B32 3
-    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.0, align 4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-thread-private" store (s1024) into %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: $m0 = S_MOV_B32 65
-    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.1, align 4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-thread-private" store (s1024) into %stack.1, align 4, addrspace 5)
     ; CHECK-NEXT: $m0 = S_MOV_B32 1
-    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.2, align 4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-thread-private" store (s1024) into %stack.2, align 4, addrspace 5)
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr104, implicit-def $vgpr110, implicit-def $vgpr232
     ; CHECK-NEXT: $m0 = S_MOV_B32 1
-    ; CHECK-NEXT: $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr233, implicit $vgpr234, implicit $vgpr235, implicit $vgpr236, implicit $vgpr237, implicit $vgpr238, implicit $vgpr239, implicit $vgpr248, implicit $vgpr249, implicit $vgpr250, implicit $vgpr251, implicit $vgpr252, implicit $vgpr253, implicit $vgpr254, implicit $vgpr255 :: ("amdgpu-non-volatile" load (s1024) from %stack.2, align 4, addrspace 5)
+    ; CHECK-NEXT: $vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239_vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247_vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255_vgpr256_vgpr257_vgpr258_vgpr259_vgpr260_vgpr261_vgpr262_vgpr263 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr233, implicit $vgpr234, implicit $vgpr235, implicit $vgpr236, implicit $vgpr237, implicit $vgpr238, implicit $vgpr239, implicit $vgpr248, implicit $vgpr249, implicit $vgpr250, implicit $vgpr251, implicit $vgpr252, implicit $vgpr253, implicit $vgpr254, implicit $vgpr255 :: ("amdgpu-thread-private" load (s1024) from %stack.2, align 4, addrspace 5)
     ; CHECK-NEXT: $m0 = S_MOV_B32 65
-    ; CHECK-NEXT: $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr105, implicit $vgpr106, implicit $vgpr107, implicit $vgpr108, implicit $vgpr109, implicit $vgpr111, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127 :: ("amdgpu-non-volatile" load (s1024) from %stack.1, align 4, addrspace 5)
+    ; CHECK-NEXT: $vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111_vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127_vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr105, implicit $vgpr106, implicit $vgpr107, implicit $vgpr108, implicit $vgpr109, implicit $vgpr111, implicit $vgpr120, implicit $vgpr121, implicit $vgpr122, implicit $vgpr123, implicit $vgpr124, implicit $vgpr125, implicit $vgpr126, implicit $vgpr127 :: ("amdgpu-thread-private" load (s1024) from %stack.1, align 4, addrspace 5)
     ; CHECK-NEXT: $m0 = S_MOV_B32 3
-    ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: ("amdgpu-non-volatile" load (s1024) from %stack.0, align 4, addrspace 5)
+    ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 32, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: ("amdgpu-thread-private" load (s1024) from %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
     S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr104, implicit-def $vgpr110, implicit-def $vgpr232
     S_SETPC_B64_return $sgpr30_sgpr31
@@ -113,14 +113,14 @@ body: |
     ; CHECK: liveins: $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $m0 = S_MOV_B32 7
-    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.0, align 4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-thread-private" store (s1024) into %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: $m0 = S_MOV_B32 3
-    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.1, align 4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-thread-private" store (s1024) into %stack.1, align 4, addrspace 5)
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42, implicit-def $vgpr70_vgpr71_vgpr72_vgpr73
     ; CHECK-NEXT: $m0 = S_MOV_B32 3
-    ; CHECK-NEXT: $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr74, implicit $vgpr75, implicit $vgpr76, implicit $vgpr77, implicit $vgpr78, implicit $vgpr79, implicit $vgpr88, implicit $vgpr89, implicit $vgpr90, implicit $vgpr91, implicit $vgpr92, implicit $vgpr93, implicit $vgpr94, implicit $vgpr95 :: ("amdgpu-non-volatile" load (s1024) from %stack.1, align 4, addrspace 5)
+    ; CHECK-NEXT: $vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79_vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95_vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr74, implicit $vgpr75, implicit $vgpr76, implicit $vgpr77, implicit $vgpr78, implicit $vgpr79, implicit $vgpr88, implicit $vgpr89, implicit $vgpr90, implicit $vgpr91, implicit $vgpr92, implicit $vgpr93, implicit $vgpr94, implicit $vgpr95 :: ("amdgpu-thread-private" load (s1024) from %stack.1, align 4, addrspace 5)
     ; CHECK-NEXT: $m0 = S_MOV_B32 7
-    ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: ("amdgpu-non-volatile" load (s1024) from %stack.0, align 4, addrspace 5)
+    ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: ("amdgpu-thread-private" load (s1024) from %stack.0, align 4, addrspace 5)
     ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
     S_NOP 0, implicit-def $vgpr40_vgpr41_vgpr42, implicit-def $vgpr70_vgpr71_vgpr72_vgpr73
     S_SETPC_B64_return $sgpr30_sgpr31
@@ -149,12 +149,12 @@ body: |
     ; CHECK: liveins: $vgpr48, $sgpr30_sgpr31, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $m0 = S_MOV_B32 1
-    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.2, align 4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-thread-private" store (s1024) into %stack.2, align 4, addrspace 5)
     ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr48, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40
     ; CHECK-NEXT: $m0 = S_MOV_B32 1
-    ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: ("amdgpu-non-volatile" load (s1024) from %stack.2, align 4, addrspace 5)
+    ; CHECK-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr43, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: ("amdgpu-thread-private" load (s1024) from %stack.2, align 4, addrspace 5)
     ; CHECK-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
     SCRATCH_STORE_DWORD_SADDR $vgpr48, %stack.0, 0, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.0, addrspace 5)
     SCRATCH_STORE_DWORD_SADDR $vgpr48, %stack.1, 4, 0, implicit $exec, implicit $flat_scr :: (store (s32) into %stack.1, addrspace 5)
@@ -183,22 +183,22 @@ body: |
     ; W32: liveins: $sgpr48, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71
     ; W32-NEXT: {{  $}}
     ; W32-NEXT: $sgpr0 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-    ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-    ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+    ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+    ; W32-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
     ; W32-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0
     ; W32-NEXT: $m0 = S_MOV_B32 9
-    ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.4, align 4, addrspace 5)
+    ; W32-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-thread-private" store (s1024) into %stack.4, align 4, addrspace 5)
     ; W32-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr48, 0, $vgpr44
     ; W32-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec
     ; W32-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40
     ; W32-NEXT: $sgpr48 = SI_RESTORE_S32_FROM_VGPR $vgpr44, 0
     ; W32-NEXT: $m0 = S_MOV_B32 9
-    ; W32-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: ("amdgpu-non-volatile" load (s1024) from %stack.4, align 4, addrspace 5)
+    ; W32-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: ("amdgpu-thread-private" load (s1024) from %stack.4, align 4, addrspace 5)
     ; W32-NEXT: $sgpr0 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; W32-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-    ; W32-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
-    ; W32-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; W32-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+    ; W32-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
+    ; W32-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
     ; W32-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0
     ; W32-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
     ;
@@ -206,22 +206,22 @@ body: |
     ; W64: liveins: $sgpr48, $sgpr30_sgpr31, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40, $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71
     ; W64-NEXT: {{  $}}
     ; W64-NEXT: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-    ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-    ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr41, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+    ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr42, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+    ; W64-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr44, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
     ; W64-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
     ; W64-NEXT: $m0 = S_MOV_B32 9
-    ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.4, align 4, addrspace 5)
+    ; W64-NEXT: SCRATCH_STORE_BLOCK_SADDR $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-thread-private" store (s1024) into %stack.4, align 4, addrspace 5)
     ; W64-NEXT: $vgpr44 = SI_SPILL_S32_TO_VGPR $sgpr48, 0, $vgpr44
     ; W64-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec
     ; W64-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7, implicit $vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40
     ; W64-NEXT: $sgpr48 = SI_RESTORE_S32_FROM_VGPR $vgpr44, 0
     ; W64-NEXT: $m0 = S_MOV_B32 9
-    ; W64-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: ("amdgpu-non-volatile" load (s1024) from %stack.4, align 4, addrspace 5)
+    ; W64-NEXT: $vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr41, implicit $vgpr42, implicit $vgpr44, implicit $vgpr45, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63 :: ("amdgpu-thread-private" load (s1024) from %stack.4, align 4, addrspace 5)
     ; W64-NEXT: $sgpr0_sgpr1 = S_OR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; W64-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-    ; W64-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
-    ; W64-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; W64-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+    ; W64-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
+    ; W64-NEXT: $vgpr44 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
     ; W64-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
     ; W64-NEXT: S_SETPC_B64_return $sgpr30_sgpr31
     S_NOP 0, implicit-def $vgpr40, implicit-def $vgpr41, implicit-def $vgpr43, implicit-def $sgpr22, implicit-def $sgpr48, implicit-def $m0, implicit-def $exec
@@ -261,7 +261,7 @@ body: |
   ; CHECK-NEXT:   liveins: $vgpr44, $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $m0 = S_MOV_B32 11
-  ; CHECK-NEXT:   SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-non-volatile" store (s1024) into %stack.0, align 4, addrspace 5)
+  ; CHECK-NEXT:   SCRATCH_STORE_BLOCK_SADDR $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0 :: ("amdgpu-thread-private" store (s1024) into %stack.0, align 4, addrspace 5)
   ; CHECK-NEXT:   S_NOP 0, implicit-def $vgpr42, implicit-def $vgpr45
   ; CHECK-NEXT:   S_BRANCH %bb.1
   ; CHECK-NEXT: {{  $}}
@@ -276,7 +276,7 @@ body: |
   ; CHECK-NEXT:   liveins: $sgpr30_sgpr31, $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $m0 = S_MOV_B32 11
-  ; CHECK-NEXT:   $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: ("amdgpu-non-volatile" load (s1024) from %stack.0, align 4, addrspace 5)
+  ; CHECK-NEXT:   $vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47_vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63_vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73 = SCRATCH_LOAD_BLOCK_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $m0, implicit $vgpr44, implicit $vgpr46, implicit $vgpr47, implicit $vgpr56, implicit $vgpr57, implicit $vgpr58, implicit $vgpr59, implicit $vgpr60, implicit $vgpr61, implicit $vgpr62, implicit $vgpr63, implicit $vgpr72, implicit $vgpr73 :: ("amdgpu-thread-private" load (s1024) from %stack.0, align 4, addrspace 5)
   ; CHECK-NEXT:   S_SETPC_B64_return $sgpr30_sgpr31
   bb.0:
     liveins: $sgpr30_sgpr31, $vgpr44
diff --git a/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir b/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir
index 8a6813039f670..35f1a5f74afc1 100644
--- a/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/regalloc-introduces-copy-sgpr-to-agpr.mir
@@ -39,7 +39,7 @@ body:             |
     ; GFX908-NEXT: $sgpr0 = S_ADD_U32 $sgpr0, $sgpr7, implicit-def $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX908-NEXT: $sgpr1 = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr0_sgpr1_sgpr2_sgpr3
     ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec
     ; GFX908-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr34, implicit $exec, implicit $exec
     ; GFX908-NEXT: renamable $vgpr34 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec
@@ -148,74 +148,74 @@ body:             |
     ; GFX908-NEXT: $vgpr35 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
     ; GFX908-NEXT: $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr35, implicit $exec, implicit $exec
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr5, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr6, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr7, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr8, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr9, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.5, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr10, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.6, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr11, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.7, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr12, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.8, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr13, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr14, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.10, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.10, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr15, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.11, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.11, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr16, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.12, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.12, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr17, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.13, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.13, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr18, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.14, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.14, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr19, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.15, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.15, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr20, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.16, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.16, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr21, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.17, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.17, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr22, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.18, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.18, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr23, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.19, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.19, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr24, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.20, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.20, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr25, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.21, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.21, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr26, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.22, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.22, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr27, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.23, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.23, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr28, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.24, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.24, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr29, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.25, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.25, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr30, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.26, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.26, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr31, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.27, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.27, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr34, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.28, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.28, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr35, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.29, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.29, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr36, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.30, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.30, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr37, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.31, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.31, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr38, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.32, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 128, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.32, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr39, implicit $exec, implicit $exec
-    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.33, addrspace 5)
+    ; GFX908-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr34, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.33, addrspace 5)
     ; GFX908-NEXT: $vgpr34 = V_MOV_B32_e32 killed $sgpr40, implicit $exec, implicit $exec
     ; GFX908-NEXT: S_NOP 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19, implicit $vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27, implicit $vgpr28_vgpr29_vgpr30_vgpr31_vgpr32_vgpr33, implicit $vgpr35
-    ; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GFX908-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec
     ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $exec
     ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec
@@ -287,39 +287,39 @@ body:             |
     ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec
     ; GFX908-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr35, implicit $exec, implicit $exec
     ; GFX908-NEXT: GLOBAL_STORE_DWORD undef $vgpr0_vgpr1, killed renamable $vgpr0, 0, 0, implicit $exec
-    ; GFX908-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-    ; GFX908-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
-    ; GFX908-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 12, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
-    ; GFX908-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
-    ; GFX908-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 20, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
-    ; GFX908-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 24, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
-    ; GFX908-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
-    ; GFX908-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 32, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, addrspace 5)
-    ; GFX908-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 36, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
-    ; GFX908-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 40, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.10, addrspace 5)
-    ; GFX908-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.11, addrspace 5)
-    ; GFX908-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 48, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.12, addrspace 5)
-    ; GFX908-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 52, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.13, addrspace 5)
-    ; GFX908-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 56, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.14, addrspace 5)
-    ; GFX908-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 60, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.15, addrspace 5)
-    ; GFX908-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.16, addrspace 5)
-    ; GFX908-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 68, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.17, addrspace 5)
-    ; GFX908-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 72, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.18, addrspace 5)
-    ; GFX908-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 76, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.19, addrspace 5)
-    ; GFX908-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 80, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.20, addrspace 5)
-    ; GFX908-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 84, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.21, addrspace 5)
-    ; GFX908-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 88, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.22, addrspace 5)
-    ; GFX908-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 92, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.23, addrspace 5)
-    ; GFX908-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.24, addrspace 5)
-    ; GFX908-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 100, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.25, addrspace 5)
-    ; GFX908-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 104, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.26, addrspace 5)
-    ; GFX908-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 108, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.27, addrspace 5)
-    ; GFX908-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 112, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.28, addrspace 5)
-    ; GFX908-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 116, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.29, addrspace 5)
-    ; GFX908-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 120, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.30, addrspace 5)
-    ; GFX908-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 124, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.31, addrspace 5)
-    ; GFX908-NEXT: $vgpr32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 128, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.32, addrspace 5)
-    ; GFX908-NEXT: $vgpr33 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 132, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.33, addrspace 5)
+    ; GFX908-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+    ; GFX908-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
+    ; GFX908-NEXT: $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 12, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
+    ; GFX908-NEXT: $vgpr4 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
+    ; GFX908-NEXT: $vgpr5 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 20, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.5, addrspace 5)
+    ; GFX908-NEXT: $vgpr6 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 24, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.6, addrspace 5)
+    ; GFX908-NEXT: $vgpr7 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.7, addrspace 5)
+    ; GFX908-NEXT: $vgpr8 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 32, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.8, addrspace 5)
+    ; GFX908-NEXT: $vgpr9 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 36, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
+    ; GFX908-NEXT: $vgpr10 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 40, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.10, addrspace 5)
+    ; GFX908-NEXT: $vgpr11 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.11, addrspace 5)
+    ; GFX908-NEXT: $vgpr12 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 48, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.12, addrspace 5)
+    ; GFX908-NEXT: $vgpr13 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 52, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.13, addrspace 5)
+    ; GFX908-NEXT: $vgpr14 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 56, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.14, addrspace 5)
+    ; GFX908-NEXT: $vgpr15 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 60, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.15, addrspace 5)
+    ; GFX908-NEXT: $vgpr16 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.16, addrspace 5)
+    ; GFX908-NEXT: $vgpr17 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 68, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.17, addrspace 5)
+    ; GFX908-NEXT: $vgpr18 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 72, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.18, addrspace 5)
+    ; GFX908-NEXT: $vgpr19 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 76, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.19, addrspace 5)
+    ; GFX908-NEXT: $vgpr20 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 80, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.20, addrspace 5)
+    ; GFX908-NEXT: $vgpr21 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 84, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.21, addrspace 5)
+    ; GFX908-NEXT: $vgpr22 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 88, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.22, addrspace 5)
+    ; GFX908-NEXT: $vgpr23 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 92, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.23, addrspace 5)
+    ; GFX908-NEXT: $vgpr24 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.24, addrspace 5)
+    ; GFX908-NEXT: $vgpr25 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 100, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.25, addrspace 5)
+    ; GFX908-NEXT: $vgpr26 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 104, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.26, addrspace 5)
+    ; GFX908-NEXT: $vgpr27 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 108, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.27, addrspace 5)
+    ; GFX908-NEXT: $vgpr28 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 112, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.28, addrspace 5)
+    ; GFX908-NEXT: $vgpr29 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 116, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.29, addrspace 5)
+    ; GFX908-NEXT: $vgpr30 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 120, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.30, addrspace 5)
+    ; GFX908-NEXT: $vgpr31 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 124, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.31, addrspace 5)
+    ; GFX908-NEXT: $vgpr32 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 128, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.32, addrspace 5)
+    ; GFX908-NEXT: $vgpr33 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, 0, 132, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.33, addrspace 5)
     ; GFX908-NEXT: S_NOP 0, implicit renamable $agpr0, implicit killed renamable $vgpr1, implicit killed renamable $vgpr2, implicit killed renamable $vgpr3, implicit killed renamable $vgpr4, implicit killed renamable $vgpr5, implicit killed renamable $vgpr6, implicit killed renamable $vgpr7, implicit killed renamable $vgpr8, implicit killed renamable $vgpr9, implicit killed renamable $vgpr10, implicit killed renamable $vgpr11, implicit killed renamable $vgpr12, implicit killed renamable $vgpr13, implicit killed renamable $vgpr14, implicit killed renamable $vgpr15, implicit killed renamable $vgpr16, implicit killed renamable $vgpr17, implicit killed renamable $vgpr18, implicit killed renamable $vgpr19, implicit killed renamable $vgpr20, implicit killed renamable $vgpr21, implicit killed renamable $vgpr22, implicit killed renamable $vgpr23, implicit killed renamable $vgpr24, implicit killed renamable $vgpr25, implicit killed renamable $vgpr26, implicit killed renamable $vgpr27, implicit killed renamable $vgpr28, implicit killed renamable $vgpr29, implicit killed renamable $vgpr30, implicit killed renamable $vgpr31, implicit killed renamable $vgpr32, implicit killed renamable $vgpr33, implicit killed renamable $vgpr34
     ; GFX908-NEXT: S_ENDPGM 0, implicit killed renamable $agpr0
     %v0:vgpr_32 = GLOBAL_LOAD_DWORD undef $vgpr0_vgpr1, 0, 0, implicit $exec
diff --git a/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir
index 38db31cf937ef..105da9178b109 100644
--- a/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/same-slot-agpr-sgpr.mir
@@ -20,11 +20,11 @@ body: |
     ; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF
     ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
     ; CHECK-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
     ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr0, implicit $sgpr4_sgpr5
     ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5
     ; CHECK-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr0, implicit $exec
-    ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr0
     ; CHECK-NEXT: S_ENDPGM 0
     $vgpr0 = IMPLICIT_DEF
@@ -53,11 +53,11 @@ body: |
     ; CHECK-NEXT: $sgpr4_sgpr5 = IMPLICIT_DEF
     ; CHECK-NEXT: $sgpr6_sgpr7 = S_MOV_B64 $exec
     ; CHECK-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
     ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, undef $vgpr0, implicit $sgpr4_sgpr5
     ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr5, 1, $vgpr0, implicit killed $sgpr4_sgpr5
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-    ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr6_sgpr7, implicit killed $vgpr0
     ; CHECK-NEXT: S_ENDPGM 0
     $vgpr0 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
index cbc00b7c5b67b..a3f6d36d2e542 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-overlap-wwm-reserve.mir
@@ -33,11 +33,11 @@ body:             |
   ; GCN-NEXT:   $vcc_hi = frame-setup COPY $sgpr33
   ; GCN-NEXT:   $sgpr33 = frame-setup COPY $sgpr32
   ; GCN-NEXT:   $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.69, addrspace 5)
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.70, addrspace 5)
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.71, addrspace 5)
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.72, addrspace 5)
-  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.73, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.69, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.70, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.71, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr4, $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.72, addrspace 5)
+  ; GCN-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr5, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.73, addrspace 5)
   ; GCN-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr0
   ; GCN-NEXT:   $sgpr32 = frame-setup S_ADD_I32 $sgpr32, 24, implicit-def dead $scc
   ; GCN-NEXT:   $vgpr2 = SI_SPILL_S32_TO_VGPR $sgpr4, 0, $vgpr2
@@ -200,11 +200,11 @@ body:             |
   ; GCN-NEXT:   $sgpr4 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0
   ; GCN-NEXT:   $sgpr32 = frame-destroy COPY $sgpr33
   ; GCN-NEXT:   $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; GCN-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.69, addrspace 5)
-  ; GCN-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.70, addrspace 5)
-  ; GCN-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.71, addrspace 5)
-  ; GCN-NEXT:   $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.72, addrspace 5)
-  ; GCN-NEXT:   $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.73, addrspace 5)
+  ; GCN-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.69, addrspace 5)
+  ; GCN-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.70, addrspace 5)
+  ; GCN-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.71, addrspace 5)
+  ; GCN-NEXT:   $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.72, addrspace 5)
+  ; GCN-NEXT:   $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.73, addrspace 5)
   ; GCN-NEXT:   $exec_lo = S_MOV_B32 killed $sgpr0
   ; GCN-NEXT:   $sgpr33 = frame-destroy COPY $vcc_hi
   ; GCN-NEXT:   S_ENDPGM 0
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
index 3e13bdfadea98..346dadb7b15e6 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-to-vmem-scc-clobber.mir
@@ -26,11 +26,11 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1, implicit-def $vgpr0
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr8, 0, undef $vgpr0
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr0
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -76,12 +76,12 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 3, implicit-def $vgpr0
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr8, 0, undef $vgpr0, implicit $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr9, 1, $vgpr0, implicit $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr0
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -125,11 +125,11 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1, implicit-def $vgpr0
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr0
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -173,12 +173,12 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 3, implicit-def $vgpr0
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr0
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
@@ -223,14 +223,14 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr8, 0, undef $vgpr0
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -276,14 +276,14 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -332,15 +332,15 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 3
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr8, 0, undef $vgpr0, implicit $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr9, 1, $vgpr0, implicit $sgpr8_sgpr9
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -386,15 +386,15 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 3
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr8_sgpr9
   ; VMEM-GFX8-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -446,24 +446,24 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr8, 0, undef $vgpr0
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr9, 0, undef $vgpr0
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN killed $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -510,24 +510,24 @@ body:             |
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr8 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr4_sgpr5 = S_MOV_B64 $exec
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 1
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 16392, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $sgpr9 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
   ; VMEM-GFX8-NEXT:   $exec = S_MOV_B64 killed $sgpr4_sgpr5
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
index 370ca8073567b..28d2ba59ebc68 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill-vmem-large-frame.mir
@@ -28,17 +28,17 @@ body:             |
     ; CHECK-NEXT: S_CMP_EQ_U32 0, 0, implicit-def $scc
     ; CHECK-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
     ; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
     ; CHECK-NEXT: $vgpr1 = SI_SPILL_S32_TO_VGPR $sgpr10, 0, undef $vgpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
     ; CHECK-NEXT: $sgpr4_sgpr5 = S_MOV_B64 $exec
     ; CHECK-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-    ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+    ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; CHECK-NEXT: $sgpr10 = SI_RESTORE_S32_FROM_VGPR killed $vgpr1, 0
-    ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; CHECK-NEXT: $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5, implicit killed $vgpr1
     ; CHECK-NEXT: S_SETPC_B64 $sgpr30_sgpr31, implicit $scc
     S_CMP_EQ_U32 0, 0, implicit-def $scc
diff --git a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
index fd5044031f3ef..8596e7d91f0f3 100644
--- a/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/sgpr-spill.mir
@@ -68,74 +68,74 @@ body:             |
     ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 0, undef $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
@@ -144,13 +144,13 @@ body:             |
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.5, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
@@ -167,13 +167,13 @@ body:             |
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.6, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
@@ -206,17 +206,17 @@ body:             |
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.7, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 262144, implicit-def dead $scc
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, align 4096, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.8, align 4096, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ;
     ; GCN32-MUBUF-LABEL: name: check_spill
@@ -232,74 +232,74 @@ body:             |
     ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 0, undef $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 15, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
@@ -308,13 +308,13 @@ body:             |
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.5, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
@@ -331,13 +331,13 @@ body:             |
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.6, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
@@ -370,17 +370,17 @@ body:             |
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.7, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr1 = S_ADD_I32 $sgpr33, 131072, implicit-def dead $scc
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.8, align 4096, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.8, align 4096, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ;
     ; GCN64-FLATSCR-LABEL: name: check_spill
@@ -392,74 +392,74 @@ body:             |
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr12, 0, undef $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit killed $sgpr12_sgpr13
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr15, 3, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr16, 4, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
@@ -468,13 +468,13 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr17, 5, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr18, 6, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr19, 7, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.5, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr13, 1, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr14, 2, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
@@ -491,13 +491,13 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr25, 13, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr26, 14, $vgpr0, implicit $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr27, 15, $vgpr0, implicit killed $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.6, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr64, 0, undef $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr65, 1, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr66, 2, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
@@ -530,17 +530,17 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr93, 29, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr94, 30, $vgpr0, implicit $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr95, 31, $vgpr0, implicit killed $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.7, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: renamable $sgpr12 = IMPLICIT_DEF
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr12, 0, undef $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, killed $sgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.8, align 4096, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, killed $sgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.8, align 4096, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     renamable $sgpr12 = IMPLICIT_DEF
     SI_SPILL_S32_SAVE killed $sgpr12, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
@@ -626,53 +626,53 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr29 = S_ADDC_U32 $sgpr29, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr28_sgpr29_sgpr30_sgpr31
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13
     ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
     ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 2
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
     ; GCN64-MUBUF-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 3
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
     ; GCN64-MUBUF-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
     ; GCN64-MUBUF-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 4
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.5, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -681,12 +681,12 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 5
     ; GCN64-MUBUF-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 6
     ; GCN64-MUBUF-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 7
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.6, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -703,12 +703,12 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 13
     ; GCN64-MUBUF-NEXT: $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 14
     ; GCN64-MUBUF-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 15
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 160, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.7, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-MUBUF-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-MUBUF-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -741,15 +741,15 @@ body:             |
     ; GCN64-MUBUF-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 29
     ; GCN64-MUBUF-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 30
     ; GCN64-MUBUF-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 31
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-MUBUF-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 262144, implicit-def dead $scc
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, align 4096, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, killed $sgpr2, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.8, align 4096, addrspace 5)
     ; GCN64-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr28_sgpr29_sgpr30_sgpr31, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-MUBUF-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ;
     ; GCN32-MUBUF-LABEL: name: check_reload
@@ -764,53 +764,53 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr97 = S_ADDC_U32 $sgpr97, 0, implicit-def dead $scc, implicit $scc, implicit-def $sgpr96_sgpr97_sgpr98_sgpr99
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 3, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13
     ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 7, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 16, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
     ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN32-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 2
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 15, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 28, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN32-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
     ; GCN32-MUBUF-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 3
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 31, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 44, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN32-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
     ; GCN32-MUBUF-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
     ; GCN32-MUBUF-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 4
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 255, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 64, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.5, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN32-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -819,12 +819,12 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 5
     ; GCN32-MUBUF-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 6
     ; GCN32-MUBUF-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 7
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 65535, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 96, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.6, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN32-MUBUF-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN32-MUBUF-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -841,12 +841,12 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 13
     ; GCN32-MUBUF-NEXT: $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 14
     ; GCN32-MUBUF-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 15
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 4294967295, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 160, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.7, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN32-MUBUF-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN32-MUBUF-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -879,15 +879,15 @@ body:             |
     ; GCN32-MUBUF-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 29
     ; GCN32-MUBUF-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 30
     ; GCN32-MUBUF-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 31
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ; GCN32-MUBUF-NEXT: $sgpr0 = S_MOV_B32 $exec_lo
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 1, implicit-def $vgpr0
-    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr1 = S_ADD_I32 $sgpr33, 131072, implicit-def dead $scc
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.8, align 4096, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, killed $sgpr1, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.8, align 4096, addrspace 5)
     ; GCN32-MUBUF-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN32-MUBUF-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN32-MUBUF-NEXT: $exec_lo = S_MOV_B32 killed $sgpr0, implicit killed $vgpr0
     ;
     ; GCN64-FLATSCR-LABEL: name: check_reload
@@ -898,53 +898,53 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $flat_scr_hi = S_ADDC_U32 $sgpr1, 0, implicit-def dead $scc, implicit $scc
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13
     ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 7, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14
     ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-FLATSCR-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 2
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 15, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15
     ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-FLATSCR-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
     ; GCN64-FLATSCR-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 3
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 31, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 44, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16
     ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-FLATSCR-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
     ; GCN64-FLATSCR-NEXT: $sgpr15 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 3
     ; GCN64-FLATSCR-NEXT: $sgpr16 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 4
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 255, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 64, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.5, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19
     ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-FLATSCR-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -953,12 +953,12 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr17 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 5
     ; GCN64-FLATSCR-NEXT: $sgpr18 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 6
     ; GCN64-FLATSCR-NEXT: $sgpr19 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 7
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 65535, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 96, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.6, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27
     ; GCN64-FLATSCR-NEXT: $sgpr13 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-FLATSCR-NEXT: $sgpr14 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -975,12 +975,12 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr25 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 13
     ; GCN64-FLATSCR-NEXT: $sgpr26 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 14
     ; GCN64-FLATSCR-NEXT: $sgpr27 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 15
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 4294967295, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 160, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.7, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr64 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $sgpr64_sgpr65_sgpr66_sgpr67_sgpr68_sgpr69_sgpr70_sgpr71_sgpr72_sgpr73_sgpr74_sgpr75_sgpr76_sgpr77_sgpr78_sgpr79_sgpr80_sgpr81_sgpr82_sgpr83_sgpr84_sgpr85_sgpr86_sgpr87_sgpr88_sgpr89_sgpr90_sgpr91_sgpr92_sgpr93_sgpr94_sgpr95
     ; GCN64-FLATSCR-NEXT: $sgpr65 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 1
     ; GCN64-FLATSCR-NEXT: $sgpr66 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 2
@@ -1013,15 +1013,15 @@ body:             |
     ; GCN64-FLATSCR-NEXT: $sgpr93 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 29
     ; GCN64-FLATSCR-NEXT: $sgpr94 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 30
     ; GCN64-FLATSCR-NEXT: $sgpr95 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 31
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GCN64-FLATSCR-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 1, implicit-def $vgpr0
-    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr2 = S_ADD_I32 $sgpr33, 4096, implicit-def dead $scc
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.8, align 4096, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR killed $sgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.8, align 4096, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $sgpr12 = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 0
-    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.9, addrspace 5)
+    ; GCN64-FLATSCR-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.9, addrspace 5)
     ; GCN64-FLATSCR-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     renamable $sgpr12 = SI_SPILL_S32_RESTORE %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
 
diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
index 82a6be138a71c..4b3fb5741dbed 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr-partially-undef.mir
@@ -18,9 +18,9 @@ body:             |
     ; CHECK: liveins: $agpr0_agpr1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
 ...
 
@@ -43,9 +43,9 @@ body:             |
     ; CHECK: liveins: $agpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
 ...
 
@@ -66,9 +66,9 @@ body:             |
     ; CHECK: liveins: $agpr1
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec, implicit-def $agpr0_agpr1, implicit $agpr0_agpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: $vgpr0 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $agpr0_agpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     SI_SPILL_A64_SAVE killed $agpr0_agpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
 ...
 
diff --git a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
index 8f564cbd2a2c1..d791b29ec579d 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-agpr.mir
@@ -255,14 +255,14 @@ body: |
   ; GFX908-EXPANDED-NEXT: {{  $}}
   ; GFX908-EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $agpr0
   ; GFX908-EXPANDED-NEXT:   $vgpr63 = V_ACCVGPR_READ_B32_e64 killed $agpr0, implicit $exec
-  ; GFX908-EXPANDED-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+  ; GFX908-EXPANDED-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr63, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
   ; GFX908-EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; GFX908-EXPANDED-NEXT: {{  $}}
   ; GFX908-EXPANDED-NEXT: bb.1:
   ; GFX908-EXPANDED-NEXT:   successors: %bb.2(0x80000000)
   ; GFX908-EXPANDED-NEXT: {{  $}}
   ; GFX908-EXPANDED-NEXT: bb.2:
-  ; GFX908-EXPANDED-NEXT:   $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+  ; GFX908-EXPANDED-NEXT:   $vgpr63 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
   ; GFX908-EXPANDED-NEXT:   $agpr0 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr63, implicit $exec
   ; GFX908-EXPANDED-NEXT:   S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
   ; GFX908-EXPANDED-NEXT:   S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
@@ -320,14 +320,14 @@ body: |
   ; GFX90A-EXPANDED-NEXT:   liveins: $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251_vgpr252_vgpr253_vgpr254_vgpr255, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239
   ; GFX90A-EXPANDED-NEXT: {{  $}}
   ; GFX90A-EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $agpr0
-  ; GFX90A-EXPANDED-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+  ; GFX90A-EXPANDED-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $agpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
   ; GFX90A-EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; GFX90A-EXPANDED-NEXT: {{  $}}
   ; GFX90A-EXPANDED-NEXT: bb.1:
   ; GFX90A-EXPANDED-NEXT:   successors: %bb.2(0x80000000)
   ; GFX90A-EXPANDED-NEXT: {{  $}}
   ; GFX90A-EXPANDED-NEXT: bb.2:
-  ; GFX90A-EXPANDED-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+  ; GFX90A-EXPANDED-NEXT:   $agpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
   ; GFX90A-EXPANDED-NEXT:   S_NOP 0, implicit undef $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31
   ; GFX90A-EXPANDED-NEXT:   S_NOP 0, implicit undef $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
   ; GFX90A-EXPANDED-NEXT:   S_NOP 0, implicit undef $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63
diff --git a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
index 1b6a3be0d257d..fd6efd536e504 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-reg-tuple-super-reg-use.mir
@@ -23,7 +23,7 @@ body:             |
     ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr8_sgpr9 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9
     ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2
     ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
@@ -33,7 +33,7 @@ body:             |
     ; GCN-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
     ; GCN-NEXT: renamable $sgpr8 = COPY renamable $sgpr1
     ; GCN-NEXT: $sgpr0_sgpr1 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
     ; GCN-NEXT: S_ENDPGM 0, implicit $sgpr8
     renamable $sgpr1 = COPY $sgpr2
@@ -61,7 +61,7 @@ body:             |
     ; GCN: liveins: $sgpr0, $sgpr1, $sgpr2, $sgpr3, $sgpr4, $sgpr5, $sgpr6, $sgpr7, $vgpr0, $vgpr1, $vgpr2, $vgpr3
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr8_sgpr9 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr8_sgpr9
     ; GCN-NEXT: renamable $sgpr1 = COPY $sgpr2
     ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
@@ -70,7 +70,7 @@ body:             |
     ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr2, 2, killed $vgpr0
     ; GCN-NEXT: dead $vgpr0 = SI_SPILL_S32_TO_VGPR $sgpr3, 3, killed $vgpr0, implicit $sgpr0_sgpr1_sgpr2_sgpr3
     ; GCN-NEXT: $sgpr0_sgpr1 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1
     ; GCN-NEXT: S_ENDPGM 0
     renamable $sgpr1 = COPY $sgpr2
@@ -96,10 +96,10 @@ body:             |
     ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr1 = COPY $vgpr2, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; GCN-NEXT: renamable $vgpr8 = COPY $vgpr2, implicit $exec
     ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr8
     renamable $vgpr1 = COPY $vgpr2
@@ -126,10 +126,10 @@ body:             |
     ; GCN: liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: renamable $vgpr1 = COPY $vgpr2, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr2, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr3, $sgpr100_sgpr101_sgpr102_sgpr103, $sgpr32, 12, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0
     renamable $vgpr1 = COPY $vgpr2
     SI_SPILL_V128_SAVE renamable killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, align 4, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/spill-restore-partial-copy.mir b/llvm/test/CodeGen/AMDGPU/spill-restore-partial-copy.mir
index efeaf97365d25..4b4266355b8ef 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-restore-partial-copy.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-restore-partial-copy.mir
@@ -162,7 +162,7 @@ body:             |
     ; GFX950-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit killed $vgpr16_vgpr17_vgpr18_vgpr19
     ; GFX950-NEXT: $agpr26 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr23, implicit $exec, implicit-def $vgpr20_vgpr21_vgpr22_vgpr23, implicit $vgpr20_vgpr21_vgpr22_vgpr23
     ; GFX950-NEXT: $agpr27 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr22, implicit $exec, implicit $vgpr20_vgpr21_vgpr22_vgpr23
-    ; GFX950-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr20_vgpr21, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr20_vgpr21_vgpr22_vgpr23 :: ("amdgpu-non-volatile" store (s64) into %stack.5, align 4, addrspace 5)
+    ; GFX950-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr20_vgpr21, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr20_vgpr21_vgpr22_vgpr23 :: ("amdgpu-thread-private" store (s64) into %stack.5, align 4, addrspace 5)
     ; GFX950-NEXT: $vgpr0 = IMPLICIT_DEF
     ; GFX950-NEXT: $agpr5 = COPY $agpr6, implicit-def $agpr2_agpr3_agpr4_agpr5
     ; GFX950-NEXT: $agpr4 = COPY $agpr7, implicit $agpr2_agpr3_agpr4_agpr5
@@ -191,7 +191,7 @@ body:             |
     ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 4096, 0, implicit $exec
     ; GFX950-NEXT: $agpr5 = COPY $agpr26, implicit-def $agpr2_agpr3_agpr4_agpr5
     ; GFX950-NEXT: $agpr4 = COPY $agpr27, implicit $agpr2_agpr3_agpr4_agpr5
-    ; GFX950-NEXT: $agpr2_agpr3 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr2_agpr3_agpr4_agpr5, implicit $agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-non-volatile" load (s64) from %stack.5, align 4, addrspace 5)
+    ; GFX950-NEXT: $agpr2_agpr3 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr2_agpr3_agpr4_agpr5, implicit $agpr2_agpr3_agpr4_agpr5 :: ("amdgpu-thread-private" load (s64) from %stack.5, align 4, addrspace 5)
     ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 5120, 0, implicit $exec
     ; GFX950-NEXT: S_ENDPGM 0
     renamable $agpr0_agpr1 = IMPLICIT_DEF
@@ -266,7 +266,7 @@ body:             |
     ; GFX950-NEXT: $agpr23 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr18, implicit $exec, implicit $vgpr16_vgpr17_vgpr18_vgpr19
     ; GFX950-NEXT: $agpr24 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr17, implicit $exec, implicit $vgpr16_vgpr17_vgpr18_vgpr19
     ; GFX950-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr16, implicit $exec, implicit killed $vgpr16_vgpr17_vgpr18_vgpr19
-    ; GFX950-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s128) into %stack.5, align 4, addrspace 5)
+    ; GFX950-NEXT: SCRATCH_STORE_DWORDX4_SADDR killed $vgpr20_vgpr21_vgpr22_vgpr23, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s128) into %stack.5, align 4, addrspace 5)
     ; GFX950-NEXT: $vgpr0 = IMPLICIT_DEF
     ; GFX950-NEXT: $agpr5 = COPY $agpr6, implicit-def $agpr2_agpr3_agpr4_agpr5
     ; GFX950-NEXT: $agpr4 = COPY $agpr7, implicit $agpr2_agpr3_agpr4_agpr5
@@ -293,7 +293,7 @@ body:             |
     ; GFX950-NEXT: $agpr3 = COPY $agpr24, implicit $agpr2_agpr3_agpr4_agpr5
     ; GFX950-NEXT: $agpr2 = COPY $agpr25, implicit $agpr2_agpr3_agpr4_agpr5
     ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 4096, 0, implicit $exec
-    ; GFX950-NEXT: $agpr2_agpr3_agpr4_agpr5 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s128) from %stack.5, align 4, addrspace 5)
+    ; GFX950-NEXT: $agpr2_agpr3_agpr4_agpr5 = SCRATCH_LOAD_DWORDX4_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s128) from %stack.5, align 4, addrspace 5)
     ; GFX950-NEXT: DS_WRITE_B128_gfx9 renamable $vgpr0, killed renamable $agpr2_agpr3_agpr4_agpr5, 5120, 0, implicit $exec
     ; GFX950-NEXT: S_ENDPGM 0
     renamable $agpr0_agpr1 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
index 8c6f85cf4a388..7066afbcf9663 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-special-sgpr.mir
@@ -50,28 +50,28 @@ body:             |
     ; GFX9-NEXT: $vcc = IMPLICIT_DEF
     ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
     ; GFX9-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
     ; GFX9-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit $vcc
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GFX9-NEXT: $vcc = IMPLICIT_DEF
     ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
     ; GFX9-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
     ; GFX9-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit killed $vcc
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GFX9-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX9-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GFX9-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; GFX9-NEXT: $vcc_lo = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $vcc
     ; GFX9-NEXT: $vcc_hi = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
-    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GFX9-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr12_sgpr13_sgpr14_sgpr15, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GFX9-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ;
     ; GFX10-LABEL: name: check_vcc
@@ -87,28 +87,28 @@ body:             |
     ; GFX10-NEXT: $vcc = IMPLICIT_DEF
     ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
     ; GFX10-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
     ; GFX10-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit $vcc
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GFX10-NEXT: $vcc = IMPLICIT_DEF
     ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
     ; GFX10-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
     ; GFX10-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit killed $vcc
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GFX10-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX10-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GFX10-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; GFX10-NEXT: $vcc_lo = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $vcc
     ; GFX10-NEXT: $vcc_hi = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
-    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GFX10-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr33, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GFX10-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ;
     ; GFX11-LABEL: name: check_vcc
@@ -118,28 +118,28 @@ body:             |
     ; GFX11-NEXT: $vcc = IMPLICIT_DEF
     ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
     ; GFX11-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
     ; GFX11-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit $vcc
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GFX11-NEXT: $vcc = IMPLICIT_DEF
     ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
     ; GFX11-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_lo, 0, undef $vgpr0, implicit $vcc
     ; GFX11-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR $vcc_hi, 1, $vgpr0, implicit killed $vcc
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     ; GFX11-NEXT: $sgpr0_sgpr1 = S_MOV_B64 $exec
     ; GFX11-NEXT: $exec = S_MOV_B64 3, implicit-def $vgpr0
-    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GFX11-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; GFX11-NEXT: $vcc_lo = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0, implicit-def $vcc
     ; GFX11-NEXT: $vcc_hi = SI_RESTORE_S32_FROM_VGPR killed $vgpr0, 1
-    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GFX11-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr33, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GFX11-NEXT: $exec = S_MOV_B64 killed $sgpr0_sgpr1, implicit killed $vgpr0
     $vcc = IMPLICIT_DEF
     SI_SPILL_S64_SAVE $vcc, %stack.0, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr32
diff --git a/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir b/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir
index c93732a052b4e..90c1b9b44dd1c 100644
--- a/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill-to-agpr-partial.mir
@@ -17,9 +17,9 @@ body:             |
     ; GCN: liveins: $agpr30, $agpr31, $agpr28_agpr29, $agpr24_agpr25_agpr26_agpr27, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s96) into %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" load (s96) from %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28_agpr29, implicit $agpr30
     SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -43,10 +43,10 @@ body:             |
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" load (s64) from %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28_agpr29
     SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -71,11 +71,11 @@ body:             |
     ; GCN-NEXT: $agpr29 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $vgpr3 = V_ACCVGPR_READ_B32_e64 $agpr29, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $vgpr2 = V_ACCVGPR_READ_B32_e64 $agpr30, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $vgpr1 = V_ACCVGPR_READ_B32_e64 $agpr31, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25_agpr26_agpr27, implicit $agpr28
     SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $vgpr0_vgpr1_vgpr2_vgpr3 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -127,9 +127,9 @@ body:             |
     ; GCN: liveins: $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr52_vgpr53, $vgpr48_vgpr49_vgpr50_vgpr51, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s96) into %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: $agpr0_agpr1_agpr2 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" load (s96) from %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52_vgpr53, implicit $vgpr54
     SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -153,10 +153,10 @@ body:             |
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: $agpr0_agpr1 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" load (s64) from %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52_vgpr53
     SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -181,11 +181,11 @@ body:             |
     ; GCN-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $agpr3 = V_ACCVGPR_WRITE_B32_e64 $vgpr53, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $agpr2 = V_ACCVGPR_WRITE_B32_e64 $vgpr54, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $agpr1 = V_ACCVGPR_WRITE_B32_e64 $vgpr55, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $agpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, implicit $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr52
     SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $agpr0_agpr1_agpr2_agpr3 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir b/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir
index a74b44ac9a31c..834e42eee9685 100644
--- a/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir
+++ b/llvm/test/CodeGen/AMDGPU/spill_kill_v16.mir
@@ -18,9 +18,9 @@ body: |
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
-  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.1, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s16) into %stack.1, align 4, addrspace 5)
   ; EXPANDED-NEXT:   S_NOP 0, implicit renamable $vgpr0_lo16
-  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.0, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s16) into %stack.0, align 4, addrspace 5)
   ; EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.1:
@@ -29,8 +29,8 @@ body: |
   ; EXPANDED-NEXT:   S_NOP 1
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.2:
-  ; EXPANDED-NEXT:   $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.0, align 4, addrspace 5)
-  ; EXPANDED-NEXT:   $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.1, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s16) from %stack.0, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s16) from %stack.1, align 4, addrspace 5)
   ; EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
   ;
   ; SRAMECC-EXPANDED-LABEL: name: spill_restore_vgpr16
@@ -38,9 +38,9 @@ body: |
   ; SRAMECC-EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
-  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.1, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s16) into %stack.1, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit renamable $vgpr0_lo16
-  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.0, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s16) into %stack.0, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT: bb.1:
@@ -49,9 +49,9 @@ body: |
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 1
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT: bb.2:
-  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.0, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s16) from %stack.0, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   $vgpr0_lo16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
-  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.1, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s16) from %stack.1, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   $vgpr0_hi16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
    bb.0:
@@ -86,9 +86,9 @@ body: |
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
-  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.1, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s16) into %stack.1, align 4, addrspace 5)
   ; EXPANDED-NEXT:   S_NOP 0, implicit renamable $vgpr0_lo16
-  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.0, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s16) into %stack.0, align 4, addrspace 5)
   ; EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.1:
@@ -98,8 +98,8 @@ body: |
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.2:
   ; EXPANDED-NEXT:   S_NOP 1
-  ; EXPANDED-NEXT:   $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.0, align 4, addrspace 5)
-  ; EXPANDED-NEXT:   $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.1, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s16) from %stack.0, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s16) from %stack.1, align 4, addrspace 5)
   ; EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
   ;
   ; SRAMECC-EXPANDED-LABEL: name: spill_restore_vgpr16_middle_of_block
@@ -107,9 +107,9 @@ body: |
   ; SRAMECC-EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
-  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.1, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s16) into %stack.1, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit renamable $vgpr0_lo16
-  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.0, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s16) into %stack.0, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT: bb.1:
@@ -119,9 +119,9 @@ body: |
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT: bb.2:
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 1
-  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.0, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s16) from %stack.0, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   $vgpr0_lo16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
-  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.1, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s16) from %stack.1, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   $vgpr0_hi16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
    bb.0:
@@ -157,9 +157,9 @@ body: |
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
-  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.1, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s16) into %stack.1, align 4, addrspace 5)
   ; EXPANDED-NEXT:   S_NOP 0, implicit renamable $vgpr0_lo16
-  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.0, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s16) into %stack.0, align 4, addrspace 5)
   ; EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.1:
@@ -168,17 +168,17 @@ body: |
   ; EXPANDED-NEXT:   S_NOP 1
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.2:
-  ; EXPANDED-NEXT:   $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.0, align 4, addrspace 5)
-  ; EXPANDED-NEXT:   $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.1, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s16) from %stack.0, align 4, addrspace 5)
+  ; EXPANDED-NEXT:   $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s16) from %stack.1, align 4, addrspace 5)
   ;
   ; SRAMECC-EXPANDED-LABEL: name: spill_restore_vgpr16_end_of_block
   ; SRAMECC-EXPANDED: bb.0:
   ; SRAMECC-EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
-  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.1, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s16) into %stack.1, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit renamable $vgpr0_lo16
-  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.0, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s16) into %stack.0, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT: bb.1:
@@ -187,9 +187,9 @@ body: |
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 1
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT: bb.2:
-  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.0, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s16) from %stack.0, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   $vgpr0_lo16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
-  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.1, align 4, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s16) from %stack.1, align 4, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   $vgpr0_hi16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
    bb.0:
      successors: %bb.1(0x80000000)
diff --git a/llvm/test/CodeGen/AMDGPU/spillv16.mir b/llvm/test/CodeGen/AMDGPU/spillv16.mir
index 0c20d7a0bf12d..73c7200cc4496 100644
--- a/llvm/test/CodeGen/AMDGPU/spillv16.mir
+++ b/llvm/test/CodeGen/AMDGPU/spillv16.mir
@@ -34,8 +34,8 @@ body: |
   ; EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
-  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.1, addrspace 5)
-  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.0, addrspace 5)
+  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s16) into %stack.1, addrspace 5)
+  ; EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s16) into %stack.0, addrspace 5)
   ; EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.1:
@@ -44,8 +44,8 @@ body: |
   ; EXPANDED-NEXT:   S_NOP 1
   ; EXPANDED-NEXT: {{  $}}
   ; EXPANDED-NEXT: bb.2:
-  ; EXPANDED-NEXT:   $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.0, addrspace 5)
-  ; EXPANDED-NEXT:   $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.1, addrspace 5)
+  ; EXPANDED-NEXT:   $vgpr0_lo16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s16) from %stack.0, addrspace 5)
+  ; EXPANDED-NEXT:   $vgpr0_hi16 = SCRATCH_LOAD_SHORT_D16_SADDR_t16 $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s16) from %stack.1, addrspace 5)
   ; EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
   ;
   ; SRAMECC-EXPANDED-LABEL: name: spill_restore_vgpr16
@@ -53,8 +53,8 @@ body: |
   ; SRAMECC-EXPANDED-NEXT:   successors: %bb.1(0x80000000)
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit-def renamable $vgpr0_lo16, implicit-def renamable $vgpr0_hi16
-  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.1, addrspace 5)
-  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s16) into %stack.0, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_hi16, $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s16) into %stack.1, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   SCRATCH_STORE_SHORT_SADDR_t16 killed $vgpr0_lo16, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s16) into %stack.0, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   S_CBRANCH_SCC1 %bb.1, implicit undef $scc
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT: bb.1:
@@ -63,9 +63,9 @@ body: |
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 1
   ; SRAMECC-EXPANDED-NEXT: {{  $}}
   ; SRAMECC-EXPANDED-NEXT: bb.2:
-  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.0, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s16) from %stack.0, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   $vgpr0_lo16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
-  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s16) from %stack.1, addrspace 5)
+  ; SRAMECC-EXPANDED-NEXT:   $vgpr1 = SCRATCH_LOAD_USHORT_SADDR $sgpr32, 2, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s16) from %stack.1, addrspace 5)
   ; SRAMECC-EXPANDED-NEXT:   $vgpr0_hi16 = V_MOV_B16_t16_e64 0, killed $vgpr1_lo16, 0, implicit $exec
   ; SRAMECC-EXPANDED-NEXT:   S_NOP 0, implicit killed renamable $vgpr0_lo16, implicit killed renamable $vgpr0_hi16
   bb.0:
diff --git a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
index 98918b987026d..ca19f7e0a55cd 100644
--- a/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
+++ b/llvm/test/CodeGen/AMDGPU/tied-op-for-wwm-scratch-reg-spill-restore.mir
@@ -20,13 +20,13 @@ body:             |
     ; GCN: liveins: $sgpr20, $vgpr1
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
     ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
     ; GCN-NEXT: $vgpr0 = COPY killed renamable $vgpr1, implicit $exec
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: SI_RETURN implicit $vgpr0
     $vgpr0 = IMPLICIT_DEF
@@ -53,8 +53,8 @@ body:             |
     ; GCN: liveins: $sgpr20, $sgpr21, $vgpr1
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
     ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
@@ -62,8 +62,8 @@ body:             |
     ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr21, 0, $vgpr2
     ; GCN-NEXT: $vgpr0 = COPY $vgpr1, implicit $exec
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
     $vgpr0 = IMPLICIT_DEF
@@ -92,13 +92,13 @@ body:             |
     ; GCN: liveins: $sgpr20, $vgpr1
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
     ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2
     ; GCN-NEXT: $vgpr0 = COPY $vgpr1, implicit $exec
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
     $vgpr2 = IMPLICIT_DEF
@@ -124,14 +124,14 @@ body:             |
     ; GCN: liveins: $sgpr20, $vgpr1
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
     ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2
     ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0, implicit $exec
     ; GCN-NEXT: $vgpr0 = COPY killed $vgpr1, implicit $exec
     ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
     ; GCN-NEXT: SI_RETURN implicit $vgpr0
     $vgpr40 = IMPLICIT_DEF
diff --git a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir
index 66d9b4f2c7114..17730521cbedf 100644
--- a/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir
+++ b/llvm/test/CodeGen/AMDGPU/vector-spill-restore-to-other-vector-type.mir
@@ -20,9 +20,9 @@ body:             |
     ; GCN: liveins: $vgpr52, $vgpr53, $vgpr54, $vgpr55, $agpr0_agpr1_agpr2_agpr3, $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $agpr0_agpr1_agpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s96) into %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: $vgpr51 = COPY $vgpr55, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51
-    ; GCN-NEXT: $vgpr48_vgpr49_vgpr50 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: $vgpr48_vgpr49_vgpr50 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: ("amdgpu-thread-private" load (s96) from %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr54, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -46,10 +46,10 @@ body:             |
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $agpr0_agpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: $vgpr51 = COPY $vgpr54, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51
     ; GCN-NEXT: $vgpr50 = COPY $vgpr55, implicit $vgpr48_vgpr49_vgpr50_vgpr51
-    ; GCN-NEXT: $vgpr48_vgpr49 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: $vgpr48_vgpr49 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: ("amdgpu-thread-private" load (s64) from %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr53, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -74,11 +74,11 @@ body:             |
     ; GCN-NEXT: $vgpr53 = V_ACCVGPR_READ_B32_e64 killed $agpr3, implicit $exec, implicit-def $agpr0_agpr1_agpr2_agpr3, implicit $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $vgpr54 = V_ACCVGPR_READ_B32_e64 killed $agpr2, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
     ; GCN-NEXT: $vgpr55 = V_ACCVGPR_READ_B32_e64 killed $agpr1, implicit $exec, implicit $agpr0_agpr1_agpr2_agpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $agpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $agpr0_agpr1_agpr2_agpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $vgpr51 = COPY $vgpr53, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51
     ; GCN-NEXT: $vgpr50 = COPY $vgpr54, implicit $vgpr48_vgpr49_vgpr50_vgpr51
     ; GCN-NEXT: $vgpr49 = COPY $vgpr55, implicit $vgpr48_vgpr49_vgpr50_vgpr51
-    ; GCN-NEXT: $vgpr48 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $vgpr48 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $vgpr48_vgpr49_vgpr50_vgpr51, implicit $vgpr48_vgpr49_vgpr50_vgpr51 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $vgpr52, implicit $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, implicit $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, implicit $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47
     SI_SPILL_A128_SAVE killed $agpr0_agpr1_agpr2_agpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $vgpr48_vgpr49_vgpr50_vgpr51 = SI_SPILL_V128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -130,9 +130,9 @@ body:             |
     ; GCN: liveins: $agpr30, $agpr31, $agpr24_agpr25, $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s96) into %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORDX3_SADDR killed $vgpr0_vgpr1_vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s96) into %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: $agpr29 = COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29
-    ; GCN-NEXT: $agpr26_agpr27_agpr28 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: ("amdgpu-non-volatile" load (s96) from %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: $agpr26_agpr27_agpr28 = SCRATCH_LOAD_DWORDX3_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: ("amdgpu-thread-private" load (s96) from %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $agpr31, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25
     SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -156,10 +156,10 @@ body:             |
     ; GCN-NEXT: {{  $}}
     ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s64) into %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORDX2_SADDR killed $vgpr0_vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s64) into %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: $agpr29 = COPY $agpr30, implicit-def $agpr26_agpr27_agpr28_agpr29
     ; GCN-NEXT: $agpr28 = COPY $agpr31, implicit $agpr26_agpr27_agpr28_agpr29
-    ; GCN-NEXT: $agpr26_agpr27 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: ("amdgpu-non-volatile" load (s64) from %stack.0, align 4, addrspace 5)
+    ; GCN-NEXT: $agpr26_agpr27 = SCRATCH_LOAD_DWORDX2_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: ("amdgpu-thread-private" load (s64) from %stack.0, align 4, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24_agpr25
     SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
@@ -184,11 +184,11 @@ body:             |
     ; GCN-NEXT: $agpr25 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr3, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $agpr30 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr2, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
     ; GCN-NEXT: $agpr31 = V_ACCVGPR_WRITE_B32_e64 killed $vgpr1, implicit $exec, implicit $vgpr0_vgpr1_vgpr2_vgpr3
-    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; GCN-NEXT: SCRATCH_STORE_DWORD_SADDR killed $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; GCN-NEXT: $agpr29 = COPY $agpr25, implicit-def $agpr26_agpr27_agpr28_agpr29
     ; GCN-NEXT: $agpr28 = COPY $agpr30, implicit $agpr26_agpr27_agpr28_agpr29
     ; GCN-NEXT: $agpr27 = COPY $agpr31, implicit $agpr26_agpr27_agpr28_agpr29
-    ; GCN-NEXT: $agpr26 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; GCN-NEXT: $agpr26 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit-def $agpr26_agpr27_agpr28_agpr29, implicit $agpr26_agpr27_agpr28_agpr29 :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; GCN-NEXT: S_ENDPGM 0, implicit $agpr0_agpr1_agpr2_agpr3_agpr4_agpr5_agpr6_agpr7_agpr8_agpr9_agpr10_agpr11_agpr12_agpr13_agpr14_agpr15, implicit $agpr16_agpr17_agpr18_agpr19_agpr20_agpr21_agpr22_agpr23, implicit $agpr24
     SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     $agpr26_agpr27_agpr28_agpr29 = SI_SPILL_A128_RESTORE %stack.0, $sgpr32, 0, implicit $exec :: (load (s128) from %stack.0, align 4, addrspace 5)
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
index 8b8d0a11941f7..d0a57cece191c 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill-scc-clobber.mir
@@ -28,7 +28,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; MUBUF-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -47,7 +47,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -65,7 +65,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -83,7 +83,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -127,8 +127,8 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; MUBUF-NEXT:   $vgpr2 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.1 + 4, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -147,7 +147,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_ADD_U32_e32 8200, $vgpr2, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.1, align 4, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.1, align 4, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -165,7 +165,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   $vgpr2 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.1, align 4, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.1, align 4, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -183,8 +183,8 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $vgpr2 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.1 + 4, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -226,7 +226,7 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; MUBUF-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -244,7 +244,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -261,7 +261,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -278,7 +278,7 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -320,8 +320,8 @@ body:             |
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; MUBUF-NEXT:   $vgpr2 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1 + 4, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -339,7 +339,7 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_ADD_U32_e32 8200, $vgpr2, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.1, align 4, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.1, align 4, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -356,7 +356,7 @@ body:             |
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   $vgpr2 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.1, align 4, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.1, align 4, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -373,8 +373,8 @@ body:             |
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
   ; VMEM-GFX8-NEXT:   $vgpr2 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1 + 4, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -416,10 +416,10 @@ body:             |
   ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; MUBUF-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -437,11 +437,11 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-  ; GFX9-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -459,10 +459,10 @@ body:             |
   ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-  ; GFX10-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -480,10 +480,10 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -528,11 +528,11 @@ body:             |
   ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; MUBUF-NEXT:   $vgpr2 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1 + 4, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -550,11 +550,11 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_ADD_U32_e32 8200, $vgpr2, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.1, align 4, addrspace 5)
-  ; GFX9-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.1, align 4, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -572,10 +572,10 @@ body:             |
   ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   $vgpr2 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s64) from %stack.1, align 4, addrspace 5)
-  ; GFX10-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr0_vgpr1 = SCRATCH_LOAD_DWORDX2 killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s64) from %stack.1, align 4, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -593,11 +593,11 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr2 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1 :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1 + 4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -642,12 +642,12 @@ body:             |
   ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; MUBUF-NEXT:   $vgpr3 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 8, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1 + 4, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1 + 8, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -665,11 +665,11 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   $vgpr3 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr3 = V_ADD_U32_e32 8200, $vgpr3, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.1, align 4, addrspace 5)
-  ; GFX9-FLATSCR-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s96) from %stack.1, align 4, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -687,10 +687,10 @@ body:             |
   ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   $vgpr3 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s96) from %stack.1, align 4, addrspace 5)
-  ; GFX10-FLATSCR-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr0_vgpr1_vgpr2 = SCRATCH_LOAD_DWORDX3 killed $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s96) from %stack.1, align 4, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -708,12 +708,12 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr3 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 4, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.1 + 8, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFEN $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1 + 4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.1 + 8, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -758,10 +758,10 @@ body:             |
   ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; MUBUF-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -779,11 +779,11 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e32 8200, $vgpr1, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; GFX9-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -801,10 +801,10 @@ body:             |
   ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   $vgpr1 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; GFX10-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD $vgpr0, killed $vgpr1, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -822,10 +822,10 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -870,11 +870,11 @@ body:             |
   ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; MUBUF-NEXT:   $vgpr2 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.1 + 4, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -892,11 +892,11 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr2 = V_ADD_U32_e32 8200, $vgpr2, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.1, align 4, addrspace 5)
-  ; GFX9-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.1, align 4, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -914,10 +914,10 @@ body:             |
   ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr2, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   $vgpr2 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s64) into %stack.1, align 4, addrspace 5)
-  ; GFX10-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX2 $vgpr0_vgpr1, killed $vgpr2, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s64) into %stack.1, align 4, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -935,11 +935,11 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr2 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.1 + 4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -984,12 +984,12 @@ body:             |
   ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; MUBUF-NEXT:   $vgpr3 = V_MOV_B32_e32 8200, implicit $exec
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr2, killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 8, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1 + 4, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr2, killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" store (s32) into %stack.1 + 8, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -1007,11 +1007,11 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   $vgpr3 = V_MOV_B32_e32 $sgpr32, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr3 = V_ADD_U32_e32 8200, $vgpr3, implicit $exec
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX3 $vgpr0_vgpr1_vgpr2, killed $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.1, align 4, addrspace 5)
-  ; GFX9-FLATSCR-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX3 $vgpr0_vgpr1_vgpr2, killed $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s96) into %stack.1, align 4, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -1029,10 +1029,10 @@ body:             |
   ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr3, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   $vgpr3 = V_ADD_U32_e64 $sgpr32, 8200, 0, implicit $exec
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX3 $vgpr0_vgpr1_vgpr2, killed $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s96) into %stack.1, align 4, addrspace 5)
-  ; GFX10-FLATSCR-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORDX3 $vgpr0_vgpr1_vgpr2, killed $vgpr3, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s96) into %stack.1, align 4, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -1050,12 +1050,12 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr3 = V_MOV_B32_e32 8200, implicit $exec
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 4, addrspace 5)
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr2, killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-non-volatile" store (s32) into %stack.1 + 8, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr0, $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr1, $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.1 + 4, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFEN $vgpr2, killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec, implicit $vgpr0_vgpr1_vgpr2 :: ("amdgpu-thread-private" store (s32) into %stack.1 + 8, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr3 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
@@ -1203,10 +1203,10 @@ body:             |
   ; MUBUF-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; MUBUF-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; MUBUF-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; MUBUF-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; MUBUF-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; MUBUF-NEXT: {{  $}}
   ; MUBUF-NEXT: bb.1:
@@ -1224,13 +1224,13 @@ body:             |
   ; GFX9-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
   ; GFX9-FLATSCR-NEXT:   S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc
   ; GFX9-FLATSCR-NEXT:   $sgpr4 = S_BITSET0_B32 0, $sgpr4
   ; GFX9-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
   ; GFX9-FLATSCR-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; GFX9-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX9-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX9-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX9-FLATSCR-NEXT: {{  $}}
   ; GFX9-FLATSCR-NEXT: bb.1:
@@ -1248,13 +1248,13 @@ body:             |
   ; GFX10-FLATSCR-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   SCRATCH_STORE_DWORD_SADDR killed $vgpr1, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   $sgpr4 = S_ADDC_U32 $sgpr32, 8200, implicit-def $scc, implicit $scc
   ; GFX10-FLATSCR-NEXT:   S_BITCMP1_B32 $sgpr4, 0, implicit-def $scc
   ; GFX10-FLATSCR-NEXT:   $sgpr4 = S_BITSET0_B32 0, $sgpr4
   ; GFX10-FLATSCR-NEXT:   $vgpr1 = V_MOV_B32_e32 killed $sgpr4, implicit $exec
   ; GFX10-FLATSCR-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, 0, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; GFX10-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; GFX10-FLATSCR-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; GFX10-FLATSCR-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; GFX10-FLATSCR-NEXT: {{  $}}
   ; GFX10-FLATSCR-NEXT: bb.1:
@@ -1272,10 +1272,10 @@ body:             |
   ; VMEM-GFX8-NEXT:   liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15, $vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31, $vgpr32_vgpr33_vgpr34_vgpr35_vgpr36_vgpr37_vgpr38_vgpr39_vgpr40_vgpr41_vgpr42_vgpr43_vgpr44_vgpr45_vgpr46_vgpr47, $vgpr48_vgpr49_vgpr50_vgpr51_vgpr52_vgpr53_vgpr54_vgpr55_vgpr56_vgpr57_vgpr58_vgpr59_vgpr60_vgpr61_vgpr62_vgpr63, $vgpr64_vgpr65_vgpr66_vgpr67_vgpr68_vgpr69_vgpr70_vgpr71_vgpr72_vgpr73_vgpr74_vgpr75_vgpr76_vgpr77_vgpr78_vgpr79, $vgpr80_vgpr81_vgpr82_vgpr83_vgpr84_vgpr85_vgpr86_vgpr87_vgpr88_vgpr89_vgpr90_vgpr91_vgpr92_vgpr93_vgpr94_vgpr95, $vgpr96_vgpr97_vgpr98_vgpr99_vgpr100_vgpr101_vgpr102_vgpr103_vgpr104_vgpr105_vgpr106_vgpr107_vgpr108_vgpr109_vgpr110_vgpr111, $vgpr112_vgpr113_vgpr114_vgpr115_vgpr116_vgpr117_vgpr118_vgpr119_vgpr120_vgpr121_vgpr122_vgpr123_vgpr124_vgpr125_vgpr126_vgpr127, $vgpr128_vgpr129_vgpr130_vgpr131_vgpr132_vgpr133_vgpr134_vgpr135_vgpr136_vgpr137_vgpr138_vgpr139_vgpr140_vgpr141_vgpr142_vgpr143, $vgpr144_vgpr145_vgpr146_vgpr147_vgpr148_vgpr149_vgpr150_vgpr151_vgpr152_vgpr153_vgpr154_vgpr155_vgpr156_vgpr157_vgpr158_vgpr159, $vgpr160_vgpr161_vgpr162_vgpr163_vgpr164_vgpr165_vgpr166_vgpr167_vgpr168_vgpr169_vgpr170_vgpr171_vgpr172_vgpr173_vgpr174_vgpr175, $vgpr176_vgpr177_vgpr178_vgpr179_vgpr180_vgpr181_vgpr182_vgpr183_vgpr184_vgpr185_vgpr186_vgpr187_vgpr188_vgpr189_vgpr190_vgpr191, $vgpr192_vgpr193_vgpr194_vgpr195_vgpr196_vgpr197_vgpr198_vgpr199_vgpr200_vgpr201_vgpr202_vgpr203_vgpr204_vgpr205_vgpr206_vgpr207, $vgpr208_vgpr209_vgpr210_vgpr211_vgpr212_vgpr213_vgpr214_vgpr215_vgpr216_vgpr217_vgpr218_vgpr219_vgpr220_vgpr221_vgpr222_vgpr223, $vgpr224_vgpr225_vgpr226_vgpr227_vgpr228_vgpr229_vgpr230_vgpr231_vgpr232_vgpr233_vgpr234_vgpr235_vgpr236_vgpr237_vgpr238_vgpr239, $vgpr240_vgpr241_vgpr242_vgpr243_vgpr244_vgpr245_vgpr246_vgpr247, $vgpr248_vgpr249_vgpr250_vgpr251, $vgpr252_vgpr253_vgpr254_vgpr255
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT:   S_CMP_EQ_U32 0, 0, implicit-def $scc
-  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   $vgpr1 = V_MOV_B32_e32 8200, implicit $exec
   ; VMEM-GFX8-NEXT:   $vgpr0 = BUFFER_LOAD_DWORD_OFFEN killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
-  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+  ; VMEM-GFX8-NEXT:   $vgpr1 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
   ; VMEM-GFX8-NEXT:   S_CBRANCH_SCC1 %bb.2, implicit $scc
   ; VMEM-GFX8-NEXT: {{  $}}
   ; VMEM-GFX8-NEXT: bb.1:
diff --git a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
index 4fdd4bac39898..93fc2857d5092 100644
--- a/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
+++ b/llvm/test/CodeGen/AMDGPU/vgpr-spill.mir
@@ -17,7 +17,7 @@ body:             |
     ; CHECK-LABEL: name: spill_v32
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: S_NOP 0, implicit $vgpr0
     SI_SPILL_V32_SAVE $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
     S_NOP 0, implicit $vgpr0
@@ -39,7 +39,7 @@ body:             |
     ; CHECK-LABEL: name: spill_v32_kill
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr32, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
 ...
 
@@ -59,8 +59,8 @@ body:             |
     ; CHECK-LABEL: name: spill_v64
     ; CHECK: liveins: $vgpr0_vgpr1
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     ; CHECK-NEXT: S_NOP 0, implicit $vgpr0_vgpr1
     SI_SPILL_V64_SAVE $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
     S_NOP 0, implicit $vgpr0_vgpr1
@@ -82,8 +82,8 @@ body:             |
     ; CHECK-LABEL: name: spill_v64_kill
     ; CHECK: liveins: $vgpr0_vgpr1
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
 ...
 
@@ -105,8 +105,8 @@ body:             |
     ; CHECK-LABEL: name: spill_v64_undef_sub1_killed
     ; CHECK: liveins: $vgpr0
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
 ...
 
@@ -126,8 +126,8 @@ body:             |
     ; CHECK-LABEL: name: spill_v64_undef_sub0_killed
     ; CHECK: liveins: $vgpr1
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1, implicit $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
     SI_SPILL_V64_SAVE killed $vgpr0_vgpr1, %stack.0, $sgpr32, 0, implicit $exec :: (store (s64) into %stack.0, addrspace 5)
 ...
 
@@ -147,10 +147,10 @@ body:             |
     ; CHECK-LABEL: name: spill_v128_kill
     ; CHECK: liveins: $vgpr0_vgpr1_vgpr2_vgpr3
     ; CHECK-NEXT: {{  $}}
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 4, addrspace 5)
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 8, addrspace 5)
-    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-non-volatile" store (s32) into %stack.0 + 12, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit-def $vgpr0_vgpr1_vgpr2_vgpr3, implicit $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr1, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 4, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 8, 0, 0, implicit $exec :: ("amdgpu-thread-private" store (s32) into %stack.0 + 8, addrspace 5)
+    ; CHECK-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr3, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 12, 0, 0, implicit $exec, implicit killed $vgpr0_vgpr1_vgpr2_vgpr3 :: ("amdgpu-thread-private" store (s32) into %stack.0 + 12, addrspace 5)
     SI_SPILL_V128_SAVE killed $vgpr0_vgpr1_vgpr2_vgpr3, %stack.0, $sgpr32, 0, implicit $exec :: (store (s128) into %stack.0, addrspace 5)
 ...
 
diff --git a/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir b/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir
index 405622c4bbf6d..25bff3f693f21 100644
--- a/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir
+++ b/llvm/test/CodeGen/AMDGPU/whole-wave-functions-pei.mir
@@ -27,11 +27,11 @@ body:             |
     ; CHECK: liveins: $vgpr0, $vgpr0
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1
     ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 14, implicit $exec
     ; CHECK-NEXT: $exec_lo = S_XOR_B32 $sgpr0, -1, implicit-def $scc
-    ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
     ; CHECK-NEXT: SI_RETURN implicit killed $vgpr0
     renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
@@ -65,9 +65,9 @@ body:             |
     ; CHECK: liveins: $vgpr40
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $sgpr0 = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: $vgpr40 = V_MOV_B32_e32 14, implicit $exec
-    ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
     ; CHECK-NEXT: SI_RETURN
     renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
@@ -102,13 +102,13 @@ body:             |
     ; CHECK: liveins: $sgpr20, $vgpr191, $vgpr192
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vcc_lo = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr192, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr192, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1
     ; CHECK-NEXT: $vgpr192 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr192
     ; CHECK-NEXT: $sgpr20 = S_MOV_B32 14, implicit $exec
     ; CHECK-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr192, 0
     ; CHECK-NEXT: $exec_lo = S_XOR_B32 $vcc_lo, -1, implicit-def $scc
-    ; CHECK-NEXT: $vgpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr192 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo
     ; CHECK-NEXT: SI_RETURN
     $vgpr192 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr192
@@ -145,11 +145,11 @@ body:             |
     ; CHECK: liveins: $sgpr20, $vgpr191
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vcc_lo = S_OR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr191, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr191, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
     ; CHECK-NEXT: $vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191
     ; CHECK-NEXT: $sgpr20 = S_MOV_B32 14, implicit $exec
     ; CHECK-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr191, 0
-    ; CHECK-NEXT: $vgpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr191 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo
     ; CHECK-NEXT: SI_RETURN
     $vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191
@@ -194,18 +194,18 @@ body:             |
     ; CHECK: liveins: $sgpr20, $vgpr0, $vgpr1, $vgpr40, $vgpr49, $vgpr49
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vcc_lo = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr49, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr49, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
     ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $sgpr20
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr49, implicit-def $sgpr40
     ; CHECK-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
-    ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_XOR_B32 $vcc_lo, -1, implicit-def $scc
-    ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; CHECK-NEXT: $vgpr49 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr49 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $vcc_lo
     ; CHECK-NEXT: SI_RETURN
     $vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191
@@ -251,19 +251,19 @@ body:             |
     ; CHECK: liveins: $sgpr20, $vgpr0, $vgpr1, $vgpr40, $vgpr49, $vgpr49
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $vcc_lo = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr49, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr49, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
     ; CHECK-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr40, implicit-def $sgpr20
     ; CHECK-NEXT: $sgpr3 = COPY $vcc_lo
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr49, implicit-def $sgpr40
     ; CHECK-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr0, 0
-    ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+    ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_XOR_B32 $sgpr3, -1, implicit-def $scc
-    ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; CHECK-NEXT: $vgpr49 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr49 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr3
     ; CHECK-NEXT: SI_RETURN
     $vgpr191 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr191
@@ -301,26 +301,26 @@ body:             |
     ; CHECK: liveins: $vgpr0, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr40, $vgpr41, $vgpr42, $vgpr0, $vgpr2, $vgpr3, $vgpr4, $vgpr5
     ; CHECK-NEXT: {{  $}}
     ; CHECK-NEXT: $sgpr0 = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.2, addrspace 5)
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr4, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.3, addrspace 5)
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.4, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr2, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr3, $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.2, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr4, $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.3, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr5, $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.4, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 -1
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.5, addrspace 5)
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr41, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.6, addrspace 5)
-    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr42, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.7, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr40, $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.5, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr41, $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.6, addrspace 5)
+    ; CHECK-NEXT: SCRATCH_STORE_DWORD_SADDR $vgpr42, $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.7, addrspace 5)
     ; CHECK-NEXT: $vgpr0 = V_MOV_B32_e32 14, implicit $exec
     ; CHECK-NEXT: S_NOP 0, implicit-def $vgpr2_vgpr3_vgpr4_vgpr5, implicit-def $vgpr40_vgpr41_vgpr42
-    ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.5, addrspace 5)
-    ; CHECK-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.6, addrspace 5)
-    ; CHECK-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.7, addrspace 5)
+    ; CHECK-NEXT: $vgpr40 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 20, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.5, addrspace 5)
+    ; CHECK-NEXT: $vgpr41 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 24, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.6, addrspace 5)
+    ; CHECK-NEXT: $vgpr42 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 28, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.7, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_XOR_B32 $sgpr0, -1, implicit-def $scc
-    ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-    ; CHECK-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
-    ; CHECK-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.2, addrspace 5)
-    ; CHECK-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.3, addrspace 5)
-    ; CHECK-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.4, addrspace 5)
+    ; CHECK-NEXT: $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+    ; CHECK-NEXT: $vgpr2 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
+    ; CHECK-NEXT: $vgpr3 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 8, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.2, addrspace 5)
+    ; CHECK-NEXT: $vgpr4 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 12, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.3, addrspace 5)
+    ; CHECK-NEXT: $vgpr5 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 16, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.4, addrspace 5)
     ; CHECK-NEXT: $exec_lo = S_MOV_B32 $sgpr0
     ; CHECK-NEXT: SI_RETURN implicit killed $vgpr0
     renamable $sgpr0 = SI_WHOLE_WAVE_FUNC_SETUP implicit-def dead $exec, implicit $exec
@@ -399,8 +399,8 @@ body:             |
   ; CHECK-NEXT:   liveins: $vgpr0, $vgpr1, $vgpr0, $vgpr1
   ; CHECK-NEXT: {{  $}}
   ; CHECK-NEXT:   $vcc_lo = S_XOR_SAVEEXEC_B32 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
-  ; CHECK-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.0, addrspace 5)
-  ; CHECK-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" store (s32) into %stack.1, addrspace 5)
+  ; CHECK-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr0, $sgpr32, 0, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.0, addrspace 5)
+  ; CHECK-NEXT:   SCRATCH_STORE_DWORD_SADDR $vgpr1, $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" store (s32) into %stack.1, addrspace 5)
   ; CHECK-NEXT:   $exec_lo = S_MOV_B32 -1
   ; CHECK-NEXT:   $sgpr1 = S_MOV_B32 $exec_lo
   ; CHECK-NEXT:   V_CMPX_EQ_U32_nosdst_e64 $vgpr0, $vgpr1, implicit-def $exec, implicit $exec
@@ -419,8 +419,8 @@ body:             |
   ; CHECK-NEXT:   $exec_lo = S_OR_B32 $exec_lo, killed renamable $sgpr1, implicit-def $scc
   ; CHECK-NEXT:   renamable $vgpr0 = V_CNDMASK_B32_e64 0, $vgpr1, 0, $vgpr0, $vcc_lo, implicit $exec
   ; CHECK-NEXT:   $exec_lo = S_XOR_B32 $vcc_lo, -1, implicit-def $scc
-  ; CHECK-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: ("amdgpu-non-volatile" load (s32) from %stack.0, addrspace 5)
-  ; CHECK-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-non-volatile" load (s32) from %stack.1, addrspace 5)
+  ; CHECK-NEXT:   $vgpr0 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 0, 0, implicit $exec, implicit $flat_scr, implicit $vgpr0(tied-def 0) :: ("amdgpu-thread-private" load (s32) from %stack.0, addrspace 5)
+  ; CHECK-NEXT:   $vgpr1 = SCRATCH_LOAD_DWORD_SADDR $sgpr32, 4, 0, implicit $exec, implicit $flat_scr :: ("amdgpu-thread-private" load (s32) from %stack.1, addrspace 5)
   ; CHECK-NEXT:   $exec_lo = S_MOV_B32 $vcc_lo
   ; CHECK-NEXT:   SI_RETURN implicit $vgpr0
   bb.0:



More information about the llvm-branch-commits mailing list